fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / mm / mlock.c
1 /*
2  *      linux/mm/mlock.c
3  *
4  *  (C) Copyright 1995 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  */
7
8 #include <linux/capability.h>
9 #include <linux/mman.h>
10 #include <linux/mm.h>
11 #include <linux/mempolicy.h>
12 #include <linux/syscalls.h>
13 #include <linux/vs_memory.h>
14
15
16 static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
17         unsigned long start, unsigned long end, unsigned int newflags)
18 {
19         struct mm_struct * mm = vma->vm_mm;
20         pgoff_t pgoff;
21         int pages;
22         int ret = 0;
23
24         if (newflags == vma->vm_flags) {
25                 *prev = vma;
26                 goto out;
27         }
28
29         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
30         *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
31                           vma->vm_file, pgoff, vma_policy(vma));
32         if (*prev) {
33                 vma = *prev;
34                 goto success;
35         }
36
37         *prev = vma;
38
39         if (start != vma->vm_start) {
40                 ret = split_vma(mm, vma, start, 1);
41                 if (ret)
42                         goto out;
43         }
44
45         if (end != vma->vm_end) {
46                 ret = split_vma(mm, vma, end, 0);
47                 if (ret)
48                         goto out;
49         }
50
51 success:
52         /*
53          * vm_flags is protected by the mmap_sem held in write mode.
54          * It's okay if try_to_unmap_one unmaps a page just after we
55          * set VM_LOCKED, make_pages_present below will bring it back.
56          */
57         vma->vm_flags = newflags;
58
59         /*
60          * Keep track of amount of locked VM.
61          */
62         pages = (end - start) >> PAGE_SHIFT;
63         if (newflags & VM_LOCKED) {
64                 pages = -pages;
65                 if (!(newflags & VM_IO))
66                         ret = make_pages_present(start, end);
67         }
68
69         vx_vmlocked_sub(mm, pages);
70 out:
71         if (ret == -ENOMEM)
72                 ret = -EAGAIN;
73         return ret;
74 }
75
76 static int do_mlock(unsigned long start, size_t len, int on)
77 {
78         unsigned long nstart, end, tmp;
79         struct vm_area_struct * vma, * prev;
80         int error;
81
82         len = PAGE_ALIGN(len);
83         end = start + len;
84         if (end < start)
85                 return -EINVAL;
86         if (end == start)
87                 return 0;
88         vma = find_vma_prev(current->mm, start, &prev);
89         if (!vma || vma->vm_start > start)
90                 return -ENOMEM;
91
92         if (start > vma->vm_start)
93                 prev = vma;
94
95         for (nstart = start ; ; ) {
96                 unsigned int newflags;
97
98                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
99
100                 newflags = vma->vm_flags | VM_LOCKED;
101                 if (!on)
102                         newflags &= ~VM_LOCKED;
103
104                 tmp = vma->vm_end;
105                 if (tmp > end)
106                         tmp = end;
107                 error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
108                 if (error)
109                         break;
110                 nstart = tmp;
111                 if (nstart < prev->vm_end)
112                         nstart = prev->vm_end;
113                 if (nstart >= end)
114                         break;
115
116                 vma = prev->vm_next;
117                 if (!vma || vma->vm_start != nstart) {
118                         error = -ENOMEM;
119                         break;
120                 }
121         }
122         return error;
123 }
124
125 asmlinkage long sys_mlock(unsigned long start, size_t len)
126 {
127         unsigned long locked, grow;
128         unsigned long lock_limit;
129         int error = -ENOMEM;
130
131         if (!can_do_mlock())
132                 return -EPERM;
133
134         down_write(&current->mm->mmap_sem);
135         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
136         start &= PAGE_MASK;
137
138         grow = len >> PAGE_SHIFT;
139         if (!vx_vmlocked_avail(current->mm, grow))
140                 goto out;
141         locked = current->mm->locked_vm + grow;
142
143         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
144         lock_limit >>= PAGE_SHIFT;
145
146         /* check against resource limits */
147         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
148                 error = do_mlock(start, len, 1);
149 out:
150         up_write(&current->mm->mmap_sem);
151         return error;
152 }
153
154 asmlinkage long sys_munlock(unsigned long start, size_t len)
155 {
156         int ret;
157
158         down_write(&current->mm->mmap_sem);
159         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
160         start &= PAGE_MASK;
161         ret = do_mlock(start, len, 0);
162         up_write(&current->mm->mmap_sem);
163         return ret;
164 }
165
166 static int do_mlockall(int flags)
167 {
168         struct vm_area_struct * vma, * prev = NULL;
169         unsigned int def_flags = 0;
170
171         if (flags & MCL_FUTURE)
172                 def_flags = VM_LOCKED;
173         current->mm->def_flags = def_flags;
174         if (flags == MCL_FUTURE)
175                 goto out;
176
177         for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
178                 unsigned int newflags;
179
180                 newflags = vma->vm_flags | VM_LOCKED;
181                 if (!(flags & MCL_CURRENT))
182                         newflags &= ~VM_LOCKED;
183
184                 /* Ignore errors */
185                 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
186         }
187 out:
188         return 0;
189 }
190
191 asmlinkage long sys_mlockall(int flags)
192 {
193         unsigned long lock_limit;
194         int ret = -EINVAL;
195
196         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
197                 goto out;
198
199         ret = -EPERM;
200         if (!can_do_mlock())
201                 goto out;
202
203         down_write(&current->mm->mmap_sem);
204
205         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
206         lock_limit >>= PAGE_SHIFT;
207
208         ret = -ENOMEM;
209         if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
210                 goto out;
211         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
212             capable(CAP_IPC_LOCK))
213                 ret = do_mlockall(flags);
214         up_write(&current->mm->mmap_sem);
215 out:
216         return ret;
217 }
218
219 asmlinkage long sys_munlockall(void)
220 {
221         int ret;
222
223         down_write(&current->mm->mmap_sem);
224         ret = do_mlockall(0);
225         up_write(&current->mm->mmap_sem);
226         return ret;
227 }
228
229 /*
230  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
231  * shm segments) get accounted against the user_struct instead.
232  */
233 static DEFINE_SPINLOCK(shmlock_user_lock);
234
235 int user_shm_lock(size_t size, struct user_struct *user)
236 {
237         unsigned long lock_limit, locked;
238         int allowed = 0;
239
240         locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
241         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
242         lock_limit >>= PAGE_SHIFT;
243         spin_lock(&shmlock_user_lock);
244         if (locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
245                 goto out;
246         get_uid(user);
247         user->locked_shm += locked;
248         allowed = 1;
249 out:
250         spin_unlock(&shmlock_user_lock);
251         return allowed;
252 }
253
254 void user_shm_unlock(size_t size, struct user_struct *user)
255 {
256         spin_lock(&shmlock_user_lock);
257         user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
258         spin_unlock(&shmlock_user_lock);
259         free_uid(user);
260 }