ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / mm / nommu.c
1 /*
2  *  linux/mm/nommu.c
3  *
4  *  Replacement code for mm functions to support CPU's that don't
5  *  have any form of memory management unit (thus no virtual memory).
6  *
7  *  Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
8  *  Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
9  *  Copyright (c) 2002      Greg Ungerer <gerg@snapgear.com>
10  */
11
12 #include <linux/mm.h>
13 #include <linux/mman.h>
14 #include <linux/swap.h>
15 #include <linux/smp_lock.h>
16 #include <linux/highmem.h>
17 #include <linux/pagemap.h>
18 #include <linux/slab.h>
19 #include <linux/vmalloc.h>
20 #include <linux/blkdev.h>
21
22 #include <asm/pgalloc.h>
23 #include <asm/uaccess.h>
24 #include <asm/tlb.h>
25 #include <asm/tlbflush.h>
26
27 void *high_memory;
28 struct page *mem_map;
29 unsigned long max_mapnr;
30 unsigned long num_physpages;
31 unsigned long askedalloc, realalloc;
32 atomic_t vm_committed_space = ATOMIC_INIT(0);
33 int sysctl_overcommit_memory; /* default is heuristic overcommit */
34 int sysctl_overcommit_ratio = 50; /* default is 50% */
35
36 /*
37  * Handle all mappings that got truncated by a "truncate()"
38  * system call.
39  *
40  * NOTE! We have to be ready to update the memory sharing
41  * between the file and the memory map for a potential last
42  * incomplete page.  Ugly, but necessary.
43  */
44 int vmtruncate(struct inode *inode, loff_t offset)
45 {
46         struct address_space *mapping = inode->i_mapping;
47         unsigned long limit;
48
49         if (inode->i_size < offset)
50                 goto do_expand;
51         i_size_write(inode, offset);
52
53         truncate_inode_pages(mapping, offset);
54         goto out_truncate;
55
56 do_expand:
57         limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
58         if (limit != RLIM_INFINITY && offset > limit)
59                 goto out_sig;
60         if (offset > inode->i_sb->s_maxbytes)
61                 goto out;
62         i_size_write(inode, offset);
63
64 out_truncate:
65         if (inode->i_op && inode->i_op->truncate)
66                 inode->i_op->truncate(inode);
67         return 0;
68 out_sig:
69         send_sig(SIGXFSZ, current, 0);
70 out:
71         return -EFBIG;
72 }
73
74 /*
75  * Return the total memory allocated for this pointer, not
76  * just what the caller asked for.
77  *
78  * Doesn't have to be accurate, i.e. may have races.
79  */
80 unsigned int kobjsize(const void *objp)
81 {
82         struct page *page;
83
84         if (!objp || !((page = virt_to_page(objp))))
85                 return 0;
86
87         if (PageSlab(page))
88                 return ksize(objp);
89
90         BUG_ON(page->index < 0);
91         BUG_ON(page->index >= MAX_ORDER);
92
93         return (PAGE_SIZE << page->index);
94 }
95
96 /*
97  * The nommu dodgy version :-)
98  */
99 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
100         unsigned long start, int len, int write, int force,
101         struct page **pages, struct vm_area_struct **vmas)
102 {
103         int i;
104         static struct vm_area_struct dummy_vma;
105
106         for (i = 0; i < len; i++) {
107                 if (pages) {
108                         pages[i] = virt_to_page(start);
109                         if (pages[i])
110                                 page_cache_get(pages[i]);
111                 }
112                 if (vmas)
113                         vmas[i] = &dummy_vma;
114                 start += PAGE_SIZE;
115         }
116         return(i);
117 }
118
119 rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
120 struct vm_struct *vmlist;
121
122 void vfree(void *addr)
123 {
124         kfree(addr);
125 }
126
127 void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot)
128 {
129         /*
130          * kmalloc doesn't like __GFP_HIGHMEM for some reason
131          */
132         return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
133 }
134
135 struct page * vmalloc_to_page(void *addr)
136 {
137         return virt_to_page(addr);
138 }
139
140 long vread(char *buf, char *addr, unsigned long count)
141 {
142         memcpy(buf, addr, count);
143         return count;
144 }
145
146 long vwrite(char *buf, char *addr, unsigned long count)
147 {
148         /* Don't allow overflow */
149         if ((unsigned long) addr + count < count)
150                 count = -(unsigned long) addr;
151         
152         memcpy(addr, buf, count);
153         return(count);
154 }
155
156 /*
157  *      vmalloc  -  allocate virtually continguos memory
158  *
159  *      @size:          allocation size
160  *
161  *      Allocate enough pages to cover @size from the page level
162  *      allocator and map them into continguos kernel virtual space.
163  *
164  *      For tight cotrol over page level allocator and protection flags
165  *      use __vmalloc() instead.
166  */
167 void *vmalloc(unsigned long size)
168 {
169        return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
170 }
171
172 /*
173  *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
174  *
175  *      @size:          allocation size
176  *
177  *      Allocate enough 32bit PA addressable pages to cover @size from the
178  *      page level allocator and map them into continguos kernel virtual space.
179  */
180 void *vmalloc_32(unsigned long size)
181 {
182         return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL);
183 }
184
185 void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot)
186 {
187         BUG();
188         return NULL;
189 }
190
191 void vunmap(void *addr)
192 {
193         BUG();
194 }
195
196 /*
197  *  sys_brk() for the most part doesn't need the global kernel
198  *  lock, except when an application is doing something nasty
199  *  like trying to un-brk an area that has already been mapped
200  *  to a regular file.  in this case, the unmapping will need
201  *  to invoke file system routines that need the global lock.
202  */
203 asmlinkage unsigned long sys_brk(unsigned long brk)
204 {
205         struct mm_struct *mm = current->mm;
206
207         if (brk < mm->end_code || brk < mm->start_brk || brk > mm->context.end_brk)
208                 return mm->brk;
209
210         if (mm->brk == brk)
211                 return mm->brk;
212
213         /*
214          * Always allow shrinking brk
215          */
216         if (brk <= mm->brk) {
217                 mm->brk = brk;
218                 return brk;
219         }
220
221         /*
222          * Ok, looks good - let it rip.
223          */
224         return mm->brk = brk;
225 }
226
227 /*
228  * Combine the mmap "prot" and "flags" argument into one "vm_flags" used
229  * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
230  * into "VM_xxx".
231  */
232 static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
233 {
234 #define _trans(x,bit1,bit2) \
235 ((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
236
237         unsigned long prot_bits, flag_bits;
238         prot_bits =
239                 _trans(prot, PROT_READ, VM_READ) |
240                 _trans(prot, PROT_WRITE, VM_WRITE) |
241                 _trans(prot, PROT_EXEC, VM_EXEC);
242         flag_bits =
243                 _trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
244                 _trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
245                 _trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
246         return prot_bits | flag_bits;
247 #undef _trans
248 }
249
250 #ifdef DEBUG
251 static void show_process_blocks(void)
252 {
253         struct mm_tblock_struct *tblock;
254
255         printk("Process blocks %d:", current->pid);
256
257         for (tblock = &current->mm->context.tblock; tblock; tblock = tblock->next) {
258                 printk(" %p: %p", tblock, tblock->rblock);
259                 if (tblock->rblock)
260                         printk(" (%d @%p #%d)", kobjsize(tblock->rblock->kblock), tblock->rblock->kblock, tblock->rblock->refcount);
261                 printk(tblock->next ? " ->" : ".\n");
262         }
263 }
264 #endif /* DEBUG */
265
266 unsigned long do_mmap_pgoff(
267         struct file * file,
268         unsigned long addr,
269         unsigned long len,
270         unsigned long prot,
271         unsigned long flags,
272         unsigned long pgoff)
273 {
274         void * result;
275         struct mm_tblock_struct * tblock;
276         unsigned int vm_flags;
277
278         /*
279          * Get the !CONFIG_MMU specific checks done first
280          */
281         if ((flags & MAP_SHARED) && (prot & PROT_WRITE) && (file)) {
282                 printk("MAP_SHARED not supported (cannot write mappings to disk)\n");
283                 return -EINVAL;
284         }
285         
286         if ((prot & PROT_WRITE) && (flags & MAP_PRIVATE)) {
287                 printk("Private writable mappings not supported\n");
288                 return -EINVAL;
289         }
290         
291         /*
292          *      now all the standard checks
293          */
294         if (file && (!file->f_op || !file->f_op->mmap))
295                 return -ENODEV;
296
297         if (PAGE_ALIGN(len) == 0)
298                 return addr;
299
300         if (len > TASK_SIZE)
301                 return -EINVAL;
302
303         /* offset overflow? */
304         if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
305                 return -EINVAL;
306
307         /* Do simple checking here so the lower-level routines won't have
308          * to. we assume access permissions have been handled by the open
309          * of the memory object, so we don't do any here.
310          */
311         vm_flags = calc_vm_flags(prot,flags) /* | mm->def_flags */ | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
312
313         /*
314          * determine the object being mapped and call the appropriate
315          * specific mapper. 
316          */
317         if (file) {
318                 struct vm_area_struct vma;
319                 int error;
320
321                 if (!file->f_op)
322                         return -ENODEV;
323
324                 vma.vm_start = addr;
325                 vma.vm_end = addr + len;
326                 vma.vm_flags = vm_flags;
327                 vma.vm_pgoff = pgoff;
328
329 #ifdef MAGIC_ROM_PTR
330                 /* First, try simpler routine designed to give us a ROM pointer. */
331
332                 if (file->f_op->romptr && !(prot & PROT_WRITE)) {
333                         error = file->f_op->romptr(file, &vma);
334 #ifdef DEBUG
335                         printk("romptr mmap returned %d, start 0x%.8x\n", error,
336                                         vma.vm_start);
337 #endif
338                         if (!error)
339                                 return vma.vm_start;
340                         else if (error != -ENOSYS)
341                                 return error;
342                 } else
343 #endif /* MAGIC_ROM_PTR */
344                 /* Then try full mmap routine, which might return a RAM pointer,
345                    or do something truly complicated. */
346                    
347                 if (file->f_op->mmap) {
348                         error = file->f_op->mmap(file, &vma);
349                                    
350 #ifdef DEBUG
351                         printk("f_op->mmap() returned %d/%lx\n", error, vma.vm_start);
352 #endif
353                         if (!error)
354                                 return vma.vm_start;
355                         else if (error != -ENOSYS)
356                                 return error;
357                 } else
358                         return -ENODEV; /* No mapping operations defined */
359
360                 /* An ENOSYS error indicates that mmap isn't possible (as opposed to
361                    tried but failed) so we'll fall through to the copy. */
362         }
363
364         tblock = (struct mm_tblock_struct *)
365                         kmalloc(sizeof(struct mm_tblock_struct), GFP_KERNEL);
366         if (!tblock) {
367                 printk("Allocation of tblock for %lu byte allocation from process %d failed\n", len, current->pid);
368                 show_free_areas();
369                 return -ENOMEM;
370         }
371
372         tblock->rblock = (struct mm_rblock_struct *)
373                         kmalloc(sizeof(struct mm_rblock_struct), GFP_KERNEL);
374
375         if (!tblock->rblock) {
376                 printk("Allocation of rblock for %lu byte allocation from process %d failed\n", len, current->pid);
377                 show_free_areas();
378                 kfree(tblock);
379                 return -ENOMEM;
380         }
381
382         result = kmalloc(len, GFP_KERNEL);
383         if (!result) {
384                 printk("Allocation of length %lu from process %d failed\n", len,
385                                 current->pid);
386                 show_free_areas();
387                 kfree(tblock->rblock);
388                 kfree(tblock);
389                 return -ENOMEM;
390         }
391
392         tblock->rblock->refcount = 1;
393         tblock->rblock->kblock = result;
394         tblock->rblock->size = len;
395         
396         realalloc += kobjsize(result);
397         askedalloc += len;
398
399 #ifdef WARN_ON_SLACK    
400         if ((len+WARN_ON_SLACK) <= kobjsize(result))
401                 printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n", len, current->pid, kobjsize(result)-len);
402 #endif
403         
404         if (file) {
405                 int error;
406                 mm_segment_t old_fs = get_fs();
407                 set_fs(KERNEL_DS);
408                 error = file->f_op->read(file, (char *) result, len, &file->f_pos);
409                 set_fs(old_fs);
410                 if (error < 0) {
411                         kfree(result);
412                         kfree(tblock->rblock);
413                         kfree(tblock);
414                         return error;
415                 }
416                 if (error < len)
417                         memset(result+error, '\0', len-error);
418         } else {
419                 memset(result, '\0', len);
420         }
421
422         realalloc += kobjsize(tblock);
423         askedalloc += sizeof(struct mm_tblock_struct);
424
425         realalloc += kobjsize(tblock->rblock);
426         askedalloc += sizeof(struct mm_rblock_struct);
427
428         tblock->next = current->mm->context.tblock.next;
429         current->mm->context.tblock.next = tblock;
430
431 #ifdef DEBUG
432         printk("do_mmap:\n");
433         show_process_blocks();
434 #endif    
435
436         return (unsigned long)result;
437 }
438
439 int do_munmap(struct mm_struct * mm, unsigned long addr, size_t len)
440 {
441         struct mm_tblock_struct * tblock, *tmp;
442
443 #ifdef MAGIC_ROM_PTR
444         /*
445          * For efficiency's sake, if the pointer is obviously in ROM,
446          * don't bother walking the lists to free it.
447          */
448         if (is_in_rom(addr))
449                 return 0;
450 #endif
451
452 #ifdef DEBUG
453         printk("do_munmap:\n");
454 #endif
455
456         tmp = &mm->context.tblock; /* dummy head */
457         while ((tblock=tmp->next) && tblock->rblock &&
458                         tblock->rblock->kblock != (void*)addr) 
459                 tmp = tblock;
460                 
461         if (!tblock) {
462                 printk("munmap of non-mmaped memory by process %d (%s): %p\n",
463                                 current->pid, current->comm, (void*)addr);
464                 return -EINVAL;
465         }
466         if (tblock->rblock) {
467                 if (!--tblock->rblock->refcount) {
468                         if (tblock->rblock->kblock) {
469                                 realalloc -= kobjsize(tblock->rblock->kblock);
470                                 askedalloc -= tblock->rblock->size;
471                                 kfree(tblock->rblock->kblock);
472                         }
473                         
474                         realalloc -= kobjsize(tblock->rblock);
475                         askedalloc -= sizeof(struct mm_rblock_struct);
476                         kfree(tblock->rblock);
477                 }
478         }
479         tmp->next = tblock->next;
480         realalloc -= kobjsize(tblock);
481         askedalloc -= sizeof(struct mm_tblock_struct);
482         kfree(tblock);
483
484 #ifdef DEBUG
485         show_process_blocks();
486 #endif    
487
488         return -EINVAL;
489 }
490
491 /* Release all mmaps. */
492 void exit_mmap(struct mm_struct * mm)
493 {
494         struct mm_tblock_struct *tmp;
495
496         if (!mm)
497                 return;
498
499 #ifdef DEBUG
500         printk("Exit_mmap:\n");
501 #endif
502
503         while((tmp = mm->context.tblock.next)) {
504                 if (tmp->rblock) {
505                         if (!--tmp->rblock->refcount) {
506                                 if (tmp->rblock->kblock) {
507                                         realalloc -= kobjsize(tmp->rblock->kblock);
508                                         askedalloc -= tmp->rblock->size;
509                                         kfree(tmp->rblock->kblock);
510                                 }
511                                 realalloc -= kobjsize(tmp->rblock);
512                                 askedalloc -= sizeof(struct mm_rblock_struct);
513                                 kfree(tmp->rblock);
514                         }
515                         tmp->rblock = 0;
516                 }
517                 mm->context.tblock.next = tmp->next;
518                 realalloc -= kobjsize(tmp);
519                 askedalloc -= sizeof(struct mm_tblock_struct);
520                 kfree(tmp);
521         }
522
523 #ifdef DEBUG
524         show_process_blocks();
525 #endif    
526 }
527
528 asmlinkage long sys_munmap(unsigned long addr, size_t len)
529 {
530         int ret;
531         struct mm_struct *mm = current->mm;
532
533         down_write(&mm->mmap_sem);
534         ret = do_munmap(mm, addr, len);
535         up_write(&mm->mmap_sem);
536         return ret;
537 }
538
539 unsigned long do_brk(unsigned long addr, unsigned long len)
540 {
541         return -ENOMEM;
542 }
543
544 struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
545 {
546         return NULL;
547 }
548
549 struct page * follow_page(struct mm_struct *mm, unsigned long addr, int write)
550 {
551         return NULL;
552 }
553
554 struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
555 {
556         return NULL;
557 }
558
559 int remap_page_range(struct vm_area_struct *vma, unsigned long from,
560                 unsigned long to, unsigned long size, pgprot_t prot)
561 {
562         return -EPERM;
563 }
564
565 unsigned long get_unmapped_area(struct file *file, unsigned long addr,
566         unsigned long len, unsigned long pgoff, unsigned long flags)
567 {
568         return -ENOMEM;
569 }
570
571 void pte_chain_init(void)
572 {
573 }