e3bc9cd77f2716951267594d7cf3904ca1413c05
[linux-2.6.git] / mm / mlock.c
1 /*
2  *      linux/mm/mlock.c
3  *
4  *  (C) Copyright 1995 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  */
7
8 #include <linux/mman.h>
9 #include <linux/mm.h>
10 #include <linux/syscalls.h>
11 #include <linux/vs_memory.h>
12
13
14 static int mlock_fixup(struct vm_area_struct * vma, 
15         unsigned long start, unsigned long end, unsigned int newflags)
16 {
17         struct mm_struct * mm = vma->vm_mm;
18         int pages;
19         int ret = 0;
20
21         if (newflags == vma->vm_flags)
22                 goto out;
23
24         if (start != vma->vm_start) {
25                 ret = split_vma(mm, vma, start, 1);
26                 if (ret)
27                         goto out;
28         }
29
30         if (end != vma->vm_end) {
31                 ret = split_vma(mm, vma, end, 0);
32                 if (ret)
33                         goto out;
34         }
35
36         /*
37          * vm_flags is protected by the mmap_sem held in write mode.
38          * It's okay if try_to_unmap_one unmaps a page just after we
39          * set VM_LOCKED, make_pages_present below will bring it back.
40          */
41         vma->vm_flags = newflags;
42
43         /*
44          * Keep track of amount of locked VM.
45          */
46         pages = (end - start) >> PAGE_SHIFT;
47         if (newflags & VM_LOCKED) {
48                 pages = -pages;
49                 if (!(newflags & VM_IO))
50                         ret = make_pages_present(start, end);
51         }
52
53         // vma->vm_mm->locked_vm -= pages;
54         vx_vmlocked_sub(vma->vm_mm, pages);
55 out:
56         if (ret == -ENOMEM)
57                 ret = -EAGAIN;
58         return ret;
59 }
60
61 static int do_mlock(unsigned long start, size_t len, int on)
62 {
63         unsigned long nstart, end, tmp;
64         struct vm_area_struct * vma, * next;
65         int error;
66
67         len = PAGE_ALIGN(len);
68         end = start + len;
69         if (end < start)
70                 return -EINVAL;
71         if (end == start)
72                 return 0;
73         vma = find_vma(current->mm, start);
74         if (!vma || vma->vm_start > start)
75                 return -ENOMEM;
76
77         for (nstart = start ; ; ) {
78                 unsigned int newflags;
79
80                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
81
82                 newflags = vma->vm_flags | VM_LOCKED;
83                 if (!on)
84                         newflags &= ~VM_LOCKED;
85
86                 if (vma->vm_end >= end) {
87                         error = mlock_fixup(vma, nstart, end, newflags);
88                         break;
89                 }
90
91                 tmp = vma->vm_end;
92                 next = vma->vm_next;
93                 error = mlock_fixup(vma, nstart, tmp, newflags);
94                 if (error)
95                         break;
96                 nstart = tmp;
97                 vma = next;
98                 if (!vma || vma->vm_start != nstart) {
99                         error = -ENOMEM;
100                         break;
101                 }
102         }
103         return error;
104 }
105
106 asmlinkage long sys_mlock(unsigned long start, size_t len)
107 {
108         unsigned long locked, grow;
109         unsigned long lock_limit;
110         int error = -ENOMEM;
111
112         if (!can_do_mlock())
113                 return -EPERM;
114
115         down_write(&current->mm->mmap_sem);
116         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
117         start &= PAGE_MASK;
118
119         grow = len >> PAGE_SHIFT;
120         if (!vx_vmlocked_avail(current->mm, grow))
121                 goto out;
122         locked = current->mm->locked_vm + grow;
123
124         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
125         lock_limit >>= PAGE_SHIFT;
126
127         /* check against resource limits */
128         if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
129                 error = do_mlock(start, len, 1);
130 out:
131         up_write(&current->mm->mmap_sem);
132         return error;
133 }
134
135 asmlinkage long sys_munlock(unsigned long start, size_t len)
136 {
137         int ret;
138
139         down_write(&current->mm->mmap_sem);
140         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
141         start &= PAGE_MASK;
142         ret = do_mlock(start, len, 0);
143         up_write(&current->mm->mmap_sem);
144         return ret;
145 }
146
147 static int do_mlockall(int flags)
148 {
149         struct vm_area_struct * vma;
150         unsigned int def_flags = 0;
151
152         if (flags & MCL_FUTURE)
153                 def_flags = VM_LOCKED;
154         current->mm->def_flags = def_flags;
155         if (flags == MCL_FUTURE)
156                 goto out;
157
158         for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
159                 unsigned int newflags;
160
161                 newflags = vma->vm_flags | VM_LOCKED;
162                 if (!(flags & MCL_CURRENT))
163                         newflags &= ~VM_LOCKED;
164
165                 /* Ignore errors */
166                 mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
167         }
168 out:
169         return 0;
170 }
171
172 asmlinkage long sys_mlockall(int flags)
173 {
174         unsigned long lock_limit;
175         int ret = -EINVAL;
176
177         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
178                 goto out;
179
180         ret = -EPERM;
181         if (!can_do_mlock())
182                 goto out;
183
184         down_write(&current->mm->mmap_sem);
185
186         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
187         lock_limit >>= PAGE_SHIFT;
188
189         ret = -ENOMEM;
190         if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
191                 goto out;
192         if (!(flags & MCL_CURRENT) || (current->mm->total_vm <= lock_limit) ||
193             capable(CAP_IPC_LOCK))
194                 ret = do_mlockall(flags);
195         up_write(&current->mm->mmap_sem);
196 out:
197         return ret;
198 }
199
200 asmlinkage long sys_munlockall(void)
201 {
202         int ret;
203
204         down_write(&current->mm->mmap_sem);
205         ret = do_mlockall(0);
206         up_write(&current->mm->mmap_sem);
207         return ret;
208 }
209
210 /*
211  * Objects with different lifetime than processes (SHM_LOCK and SHM_HUGETLB
212  * shm segments) get accounted against the user_struct instead.
213  */
214 static DEFINE_SPINLOCK(shmlock_user_lock);
215
216 int user_shm_lock(size_t size, struct user_struct *user)
217 {
218         unsigned long lock_limit, locked;
219         int allowed = 0;
220
221         locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
222         lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
223         lock_limit >>= PAGE_SHIFT;
224         spin_lock(&shmlock_user_lock);
225         if (locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK))
226                 goto out;
227         get_uid(user);
228         user->locked_shm += locked;
229         allowed = 1;
230 out:
231         spin_unlock(&shmlock_user_lock);
232         return allowed;
233 }
234
235 void user_shm_unlock(size_t size, struct user_struct *user)
236 {
237         spin_lock(&shmlock_user_lock);
238         user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
239         spin_unlock(&shmlock_user_lock);
240         free_uid(user);
241 }