vserver 1.9.3
[linux-2.6.git] / mm / mlock.c
1 /*
2  *      linux/mm/mlock.c
3  *
4  *  (C) Copyright 1995 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  */
7
8 #include <linux/mman.h>
9 #include <linux/mm.h>
10 #include <linux/vs_memory.h>
11
12
13 static int mlock_fixup(struct vm_area_struct * vma, 
14         unsigned long start, unsigned long end, unsigned int newflags)
15 {
16         struct mm_struct * mm = vma->vm_mm;
17         int pages;
18         int ret = 0;
19
20         if (newflags == vma->vm_flags)
21                 goto out;
22
23         if (start != vma->vm_start) {
24                 if (split_vma(mm, vma, start, 1)) {
25                         ret = -EAGAIN;
26                         goto out;
27                 }
28         }
29
30         if (end != vma->vm_end) {
31                 if (split_vma(mm, vma, end, 0)) {
32                         ret = -EAGAIN;
33                         goto out;
34                 }
35         }
36
37         /*
38          * vm_flags is protected by the mmap_sem held in write mode.
39          * It's okay if try_to_unmap_one unmaps a page just after we
40          * set VM_LOCKED, make_pages_present below will bring it back.
41          */
42         vma->vm_flags = newflags;
43
44         /*
45          * Keep track of amount of locked VM.
46          */
47         pages = (end - start) >> PAGE_SHIFT;
48         if (newflags & VM_LOCKED) {
49                 pages = -pages;
50                 ret = make_pages_present(start, end);
51         }
52
53         // vma->vm_mm->locked_vm -= pages;
54         vx_vmlocked_sub(vma->vm_mm, pages);
55 out:
56         return ret;
57 }
58
59 static int do_mlock(unsigned long start, size_t len, int on)
60 {
61         unsigned long nstart, end, tmp;
62         struct vm_area_struct * vma, * next;
63         int error;
64
65         len = PAGE_ALIGN(len);
66         end = start + len;
67         if (end < start)
68                 return -EINVAL;
69         if (end == start)
70                 return 0;
71         vma = find_vma(current->mm, start);
72         if (!vma || vma->vm_start > start)
73                 return -ENOMEM;
74
75         for (nstart = start ; ; ) {
76                 unsigned int newflags;
77
78                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
79
80                 newflags = vma->vm_flags | VM_LOCKED;
81                 if (!on)
82                         newflags &= ~VM_LOCKED;
83
84                 if (vma->vm_end >= end) {
85                         error = mlock_fixup(vma, nstart, end, newflags);
86                         break;
87                 }
88
89                 tmp = vma->vm_end;
90                 next = vma->vm_next;
91                 error = mlock_fixup(vma, nstart, tmp, newflags);
92                 if (error)
93                         break;
94                 nstart = tmp;
95                 vma = next;
96                 if (!vma || vma->vm_start != nstart) {
97                         error = -ENOMEM;
98                         break;
99                 }
100         }
101         return error;
102 }
103
104 asmlinkage long sys_mlock(unsigned long start, size_t len)
105 {
106         unsigned long locked, grow;
107         unsigned long lock_limit;
108         int error = -ENOMEM;
109
110         if (!can_do_mlock())
111                 return -EPERM;
112
113         down_write(&current->mm->mmap_sem);
114         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
115         start &= PAGE_MASK;
116
117         grow = len >> PAGE_SHIFT;
118         if (!vx_vmlocked_avail(current->mm, grow))
119                 goto out;
120         locked = current->mm->locked_vm + grow;
121
122         lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
123         lock_limit >>= PAGE_SHIFT;
124
125         /* check against resource limits */
126         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
127                 error = do_mlock(start, len, 1);
128 out:
129         up_write(&current->mm->mmap_sem);
130         return error;
131 }
132
133 asmlinkage long sys_munlock(unsigned long start, size_t len)
134 {
135         int ret;
136
137         down_write(&current->mm->mmap_sem);
138         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
139         start &= PAGE_MASK;
140         ret = do_mlock(start, len, 0);
141         up_write(&current->mm->mmap_sem);
142         return ret;
143 }
144
145 static int do_mlockall(int flags)
146 {
147         struct vm_area_struct * vma;
148         unsigned int def_flags = 0;
149
150         if (flags & MCL_FUTURE)
151                 def_flags = VM_LOCKED;
152         current->mm->def_flags = def_flags;
153         if (flags == MCL_FUTURE)
154                 goto out;
155
156         for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
157                 unsigned int newflags;
158
159                 newflags = vma->vm_flags | VM_LOCKED;
160                 if (!(flags & MCL_CURRENT))
161                         newflags &= ~VM_LOCKED;
162
163                 /* Ignore errors */
164                 mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
165         }
166 out:
167         return 0;
168 }
169
170 asmlinkage long sys_mlockall(int flags)
171 {
172         unsigned long lock_limit;
173         int ret = -EINVAL;
174
175         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
176                 goto out;
177
178         ret = -EPERM;
179         if (!can_do_mlock())
180                 goto out;
181
182         down_write(&current->mm->mmap_sem);
183
184         lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
185         lock_limit >>= PAGE_SHIFT;
186
187         ret = -ENOMEM;
188         if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
189                 goto out;
190         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
191             capable(CAP_IPC_LOCK))
192                 ret = do_mlockall(flags);
193         up_write(&current->mm->mmap_sem);
194 out:
195         return ret;
196 }
197
198 asmlinkage long sys_munlockall(void)
199 {
200         int ret;
201
202         down_write(&current->mm->mmap_sem);
203         ret = do_mlockall(0);
204         up_write(&current->mm->mmap_sem);
205         return ret;
206 }
207
208 /*
209  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
210  * shm segments) get accounted against the user_struct instead.
211  */
212 static spinlock_t shmlock_user_lock = SPIN_LOCK_UNLOCKED;
213
214 int user_shm_lock(size_t size, struct user_struct *user)
215 {
216         unsigned long lock_limit, locked;
217         int allowed = 0;
218
219         spin_lock(&shmlock_user_lock);
220         locked = size >> PAGE_SHIFT;
221         lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
222         lock_limit >>= PAGE_SHIFT;
223         if (locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
224                 goto out;
225         get_uid(user);
226         user->locked_shm += locked;
227         allowed = 1;
228 out:
229         spin_unlock(&shmlock_user_lock);
230         return allowed;
231 }
232
233 void user_shm_unlock(size_t size, struct user_struct *user)
234 {
235         spin_lock(&shmlock_user_lock);
236         user->locked_shm -= (size >> PAGE_SHIFT);
237         spin_unlock(&shmlock_user_lock);
238         free_uid(user);
239 }