ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / mm / mprotect.c
1 /*
2  *  mm/mprotect.c
3  *
4  *  (C) Copyright 1994 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  *
7  *  Address space accounting code       <alan@redhat.com>
8  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
9  */
10
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/slab.h>
14 #include <linux/shm.h>
15 #include <linux/mman.h>
16 #include <linux/fs.h>
17 #include <linux/highmem.h>
18 #include <linux/security.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/pgalloc.h>
22 #include <asm/pgtable.h>
23 #include <asm/cacheflush.h>
24 #include <asm/tlbflush.h>
25
26 static inline void
27 change_pte_range(pmd_t *pmd, unsigned long address,
28                 unsigned long size, pgprot_t newprot)
29 {
30         pte_t * pte;
31         unsigned long end;
32
33         if (pmd_none(*pmd))
34                 return;
35         if (pmd_bad(*pmd)) {
36                 pmd_ERROR(*pmd);
37                 pmd_clear(pmd);
38                 return;
39         }
40         pte = pte_offset_map(pmd, address);
41         address &= ~PMD_MASK;
42         end = address + size;
43         if (end > PMD_SIZE)
44                 end = PMD_SIZE;
45         do {
46                 if (pte_present(*pte)) {
47                         pte_t entry;
48
49                         /* Avoid an SMP race with hardware updated dirty/clean
50                          * bits by wiping the pte and then setting the new pte
51                          * into place.
52                          */
53                         entry = ptep_get_and_clear(pte);
54                         set_pte(pte, pte_modify(entry, newprot));
55                 }
56                 address += PAGE_SIZE;
57                 pte++;
58         } while (address && (address < end));
59         pte_unmap(pte - 1);
60 }
61
62 static inline void
63 change_pmd_range(pgd_t *pgd, unsigned long address,
64                 unsigned long size, pgprot_t newprot)
65 {
66         pmd_t * pmd;
67         unsigned long end;
68
69         if (pgd_none(*pgd))
70                 return;
71         if (pgd_bad(*pgd)) {
72                 pgd_ERROR(*pgd);
73                 pgd_clear(pgd);
74                 return;
75         }
76         pmd = pmd_offset(pgd, address);
77         address &= ~PGDIR_MASK;
78         end = address + size;
79         if (end > PGDIR_SIZE)
80                 end = PGDIR_SIZE;
81         do {
82                 change_pte_range(pmd, address, end - address, newprot);
83                 address = (address + PMD_SIZE) & PMD_MASK;
84                 pmd++;
85         } while (address && (address < end));
86 }
87
88 static void
89 change_protection(struct vm_area_struct *vma, unsigned long start,
90                 unsigned long end, pgprot_t newprot)
91 {
92         pgd_t *dir;
93         unsigned long beg = start;
94
95         dir = pgd_offset(current->mm, start);
96         flush_cache_range(vma, beg, end);
97         if (start >= end)
98                 BUG();
99         spin_lock(&current->mm->page_table_lock);
100         do {
101                 change_pmd_range(dir, start, end - start, newprot);
102                 start = (start + PGDIR_SIZE) & PGDIR_MASK;
103                 dir++;
104         } while (start && (start < end));
105         flush_tlb_range(vma, beg, end);
106         spin_unlock(&current->mm->page_table_lock);
107         return;
108 }
109 /*
110  * Try to merge a vma with the previous flag, return 1 if successful or 0 if it
111  * was impossible.
112  */
113 static int
114 mprotect_attempt_merge(struct vm_area_struct *vma, struct vm_area_struct *prev,
115                 unsigned long end, int newflags)
116 {
117         struct mm_struct * mm;
118
119         if (!prev || !vma)
120                 return 0;
121         mm = vma->vm_mm;
122         if (prev->vm_end != vma->vm_start)
123                 return 0;
124         if (!can_vma_merge(prev, newflags))
125                 return 0;
126         if (vma->vm_file || (vma->vm_flags & VM_SHARED))
127                 return 0;
128
129         /*
130          * If the whole area changes to the protection of the previous one
131          * we can just get rid of it.
132          */
133         if (end == vma->vm_end) {
134                 spin_lock(&mm->page_table_lock);
135                 prev->vm_end = end;
136                 __vma_unlink(mm, vma, prev);
137                 spin_unlock(&mm->page_table_lock);
138
139                 kmem_cache_free(vm_area_cachep, vma);
140                 mm->map_count--;
141                 return 1;
142         } 
143
144         /*
145          * Otherwise extend it.
146          */
147         spin_lock(&mm->page_table_lock);
148         prev->vm_end = end;
149         vma->vm_start = end;
150         spin_unlock(&mm->page_table_lock);
151         return 1;
152 }
153
154 static int
155 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
156         unsigned long start, unsigned long end, unsigned int newflags)
157 {
158         struct mm_struct * mm = vma->vm_mm;
159         unsigned long charged = 0;
160         pgprot_t newprot;
161         int error;
162
163         if (newflags == vma->vm_flags) {
164                 *pprev = vma;
165                 return 0;
166         }
167
168         /*
169          * If we make a private mapping writable we increase our commit;
170          * but (without finer accounting) cannot reduce our commit if we
171          * make it unwritable again.
172          *
173          * FIXME? We haven't defined a VM_NORESERVE flag, so mprotecting
174          * a MAP_NORESERVE private mapping to writable will now reserve.
175          */
176         if (newflags & VM_WRITE) {
177                 if (!(vma->vm_flags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))
178                                 && VM_MAYACCT(vma)) {
179                         charged = (end - start) >> PAGE_SHIFT;
180                         if (security_vm_enough_memory(charged))
181                                 return -ENOMEM;
182                         newflags |= VM_ACCOUNT;
183                 }
184         }
185
186         newprot = protection_map[newflags & 0xf];
187
188         if (start == vma->vm_start) {
189                 /*
190                  * Try to merge with the previous vma.
191                  */
192                 if (mprotect_attempt_merge(vma, *pprev, end, newflags)) {
193                         vma = *pprev;
194                         goto success;
195                 }
196         } else {
197                 error = split_vma(mm, vma, start, 1);
198                 if (error)
199                         goto fail;
200         }
201         /*
202          * Unless it returns an error, this function always sets *pprev to
203          * the first vma for which vma->vm_end >= end.
204          */
205         *pprev = vma;
206
207         if (end != vma->vm_end) {
208                 error = split_vma(mm, vma, end, 0);
209                 if (error)
210                         goto fail;
211         }
212
213         spin_lock(&mm->page_table_lock);
214         vma->vm_flags = newflags;
215         vma->vm_page_prot = newprot;
216         spin_unlock(&mm->page_table_lock);
217 success:
218         change_protection(vma, start, end, newprot);
219         return 0;
220
221 fail:
222         vm_unacct_memory(charged);
223         return error;
224 }
225
226 asmlinkage long
227 sys_mprotect(unsigned long start, size_t len, unsigned long prot)
228 {
229         unsigned long vm_flags, nstart, end, tmp;
230         struct vm_area_struct * vma, * next, * prev;
231         int error = -EINVAL;
232         const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
233         prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
234         if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
235                 return -EINVAL;
236
237         if (start & ~PAGE_MASK)
238                 return -EINVAL;
239         len = PAGE_ALIGN(len);
240         end = start + len;
241         if (end < start)
242                 return -ENOMEM;
243         if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM))
244                 return -EINVAL;
245         if (end == start)
246                 return 0;
247
248         vm_flags = calc_vm_prot_bits(prot);
249
250         down_write(&current->mm->mmap_sem);
251
252         vma = find_vma_prev(current->mm, start, &prev);
253         error = -ENOMEM;
254         if (!vma)
255                 goto out;
256         if (unlikely(grows & PROT_GROWSDOWN)) {
257                 if (vma->vm_start >= end)
258                         goto out;
259                 start = vma->vm_start;
260                 error = -EINVAL;
261                 if (!(vma->vm_flags & VM_GROWSDOWN))
262                         goto out;
263         }
264         else {
265                 if (vma->vm_start > start)
266                         goto out;
267                 if (unlikely(grows & PROT_GROWSUP)) {
268                         end = vma->vm_end;
269                         error = -EINVAL;
270                         if (!(vma->vm_flags & VM_GROWSUP))
271                                 goto out;
272                 }
273         }
274
275         for (nstart = start ; ; ) {
276                 unsigned int newflags;
277                 int last = 0;
278
279                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
280
281                 if (is_vm_hugetlb_page(vma)) {
282                         error = -EACCES;
283                         goto out;
284                 }
285
286                 newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
287
288                 if ((newflags & ~(newflags >> 4)) & 0xf) {
289                         error = -EACCES;
290                         goto out;
291                 }
292
293                 error = security_file_mprotect(vma, prot);
294                 if (error)
295                         goto out;
296
297                 if (vma->vm_end > end) {
298                         error = mprotect_fixup(vma, &prev, nstart, end, newflags);
299                         goto out;
300                 }
301                 if (vma->vm_end == end)
302                         last = 1;
303
304                 tmp = vma->vm_end;
305                 next = vma->vm_next;
306                 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
307                 if (error)
308                         goto out;
309                 if (last)
310                         break;
311                 nstart = tmp;
312                 vma = next;
313                 if (!vma || vma->vm_start != nstart) {
314                         error = -ENOMEM;
315                         goto out;
316                 }
317         }
318
319         if (next && prev->vm_end == next->vm_start &&
320                         can_vma_merge(next, prev->vm_flags) &&
321                         !prev->vm_file && !(prev->vm_flags & VM_SHARED)) {
322                 spin_lock(&prev->vm_mm->page_table_lock);
323                 prev->vm_end = next->vm_end;
324                 __vma_unlink(prev->vm_mm, next, prev);
325                 spin_unlock(&prev->vm_mm->page_table_lock);
326
327                 kmem_cache_free(vm_area_cachep, next);
328                 prev->vm_mm->map_count--;
329         }
330 out:
331         up_write(&current->mm->mmap_sem);
332         return error;
333 }