vserver 2.0-rc4
[linux-2.6.git] / mm / mlock.c
1 /*
2  *      linux/mm/mlock.c
3  *
4  *  (C) Copyright 1995 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  */
7
8 #include <linux/mman.h>
9 #include <linux/mm.h>
10 #include <linux/syscalls.h>
11 #include <linux/vs_memory.h>
12
13
14 static int mlock_fixup(struct vm_area_struct * vma, 
15         unsigned long start, unsigned long end, unsigned int newflags)
16 {
17         struct mm_struct * mm = vma->vm_mm;
18         int pages;
19         int ret = 0;
20
21         if (newflags == vma->vm_flags)
22                 goto out;
23
24         if (start != vma->vm_start) {
25                 ret = split_vma(mm, vma, start, 1);
26                 if (ret)
27                         goto out;
28         }
29
30         if (end != vma->vm_end) {
31                 ret = split_vma(mm, vma, end, 0);
32                 if (ret)
33                         goto out;
34         }
35
36         /*
37          * vm_flags is protected by the mmap_sem held in write mode.
38          * It's okay if try_to_unmap_one unmaps a page just after we
39          * set VM_LOCKED, make_pages_present below will bring it back.
40          */
41         vma->vm_flags = newflags;
42
43         /*
44          * Keep track of amount of locked VM.
45          */
46         pages = (end - start) >> PAGE_SHIFT;
47         if (newflags & VM_LOCKED) {
48                 pages = -pages;
49                 if (!(newflags & VM_IO))
50                         ret = make_pages_present(start, end);
51         }
52
53         vx_vmlocked_sub(vma->vm_mm, pages);
54 out:
55         if (ret == -ENOMEM)
56                 ret = -EAGAIN;
57         return ret;
58 }
59
60 static int do_mlock(unsigned long start, size_t len, int on)
61 {
62         unsigned long nstart, end, tmp;
63         struct vm_area_struct * vma, * next;
64         int error;
65
66         len = PAGE_ALIGN(len);
67         end = start + len;
68         if (end < start)
69                 return -EINVAL;
70         if (end == start)
71                 return 0;
72         vma = find_vma(current->mm, start);
73         if (!vma || vma->vm_start > start)
74                 return -ENOMEM;
75
76         for (nstart = start ; ; ) {
77                 unsigned int newflags;
78
79                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
80
81                 newflags = vma->vm_flags | VM_LOCKED;
82                 if (!on)
83                         newflags &= ~VM_LOCKED;
84
85                 if (vma->vm_end >= end) {
86                         error = mlock_fixup(vma, nstart, end, newflags);
87                         break;
88                 }
89
90                 tmp = vma->vm_end;
91                 next = vma->vm_next;
92                 error = mlock_fixup(vma, nstart, tmp, newflags);
93                 if (error)
94                         break;
95                 nstart = tmp;
96                 vma = next;
97                 if (!vma || vma->vm_start != nstart) {
98                         error = -ENOMEM;
99                         break;
100                 }
101         }
102         return error;
103 }
104
105 asmlinkage long sys_mlock(unsigned long start, size_t len)
106 {
107         unsigned long locked, grow;
108         unsigned long lock_limit;
109         int error = -ENOMEM;
110
111         if (!can_do_mlock())
112                 return -EPERM;
113
114         down_write(&current->mm->mmap_sem);
115         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
116         start &= PAGE_MASK;
117
118         grow = len >> PAGE_SHIFT;
119         if (!vx_vmlocked_avail(current->mm, grow))
120                 goto out;
121         locked = current->mm->locked_vm + grow;
122
123         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
124         lock_limit >>= PAGE_SHIFT;
125
126         /* check against resource limits */
127         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
128                 error = do_mlock(start, len, 1);
129 out:
130         up_write(&current->mm->mmap_sem);
131         return error;
132 }
133
134 asmlinkage long sys_munlock(unsigned long start, size_t len)
135 {
136         int ret;
137
138         down_write(&current->mm->mmap_sem);
139         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
140         start &= PAGE_MASK;
141         ret = do_mlock(start, len, 0);
142         up_write(&current->mm->mmap_sem);
143         return ret;
144 }
145
146 static int do_mlockall(int flags)
147 {
148         struct vm_area_struct * vma;
149         unsigned int def_flags = 0;
150
151         if (flags & MCL_FUTURE)
152                 def_flags = VM_LOCKED;
153         current->mm->def_flags = def_flags;
154         if (flags == MCL_FUTURE)
155                 goto out;
156
157         for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
158                 unsigned int newflags;
159
160                 newflags = vma->vm_flags | VM_LOCKED;
161                 if (!(flags & MCL_CURRENT))
162                         newflags &= ~VM_LOCKED;
163
164                 /* Ignore errors */
165                 mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
166         }
167 out:
168         return 0;
169 }
170
171 asmlinkage long sys_mlockall(int flags)
172 {
173         unsigned long lock_limit;
174         int ret = -EINVAL;
175
176         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
177                 goto out;
178
179         ret = -EPERM;
180         if (!can_do_mlock())
181                 goto out;
182
183         down_write(&current->mm->mmap_sem);
184
185         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
186         lock_limit >>= PAGE_SHIFT;
187
188         ret = -ENOMEM;
189         if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
190                 goto out;
191         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
192             capable(CAP_IPC_LOCK))
193                 ret = do_mlockall(flags);
194         up_write(&current->mm->mmap_sem);
195 out:
196         return ret;
197 }
198
199 asmlinkage long sys_munlockall(void)
200 {
201         int ret;
202
203         down_write(&current->mm->mmap_sem);
204         ret = do_mlockall(0);
205         up_write(&current->mm->mmap_sem);
206         return ret;
207 }
208
209 /*
210  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
211  * shm segments) get accounted against the user_struct instead.
212  */
213 static DEFINE_SPINLOCK(shmlock_user_lock);
214
215 int user_shm_lock(size_t size, struct user_struct *user)
216 {
217         unsigned long lock_limit, locked;
218         int allowed = 0;
219
220         locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
221         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
222         lock_limit >>= PAGE_SHIFT;
223         spin_lock(&shmlock_user_lock);
224         if (locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
225                 goto out;
226         get_uid(user);
227         user->locked_shm += locked;
228         allowed = 1;
229 out:
230         spin_unlock(&shmlock_user_lock);
231         return allowed;
232 }
233
234 void user_shm_unlock(size_t size, struct user_struct *user)
235 {
236         spin_lock(&shmlock_user_lock);
237         user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
238         spin_unlock(&shmlock_user_lock);
239         free_uid(user);
240 }