Merge to Fedora kernel-2.6.7-1.441
[linux-2.6.git] / mm / mlock.c
1 /*
2  *      linux/mm/mlock.c
3  *
4  *  (C) Copyright 1995 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  */
7
8 #include <linux/mman.h>
9 #include <linux/mm.h>
10 #include <linux/vs_memory.h>
11
12
13 static int mlock_fixup(struct vm_area_struct * vma, 
14         unsigned long start, unsigned long end, unsigned int newflags)
15 {
16         struct mm_struct * mm = vma->vm_mm;
17         int pages;
18         int ret = 0;
19
20         if (newflags == vma->vm_flags)
21                 goto out;
22
23         if (start != vma->vm_start) {
24                 if (split_vma(mm, vma, start, 1)) {
25                         ret = -EAGAIN;
26                         goto out;
27                 }
28         }
29
30         if (end != vma->vm_end) {
31                 if (split_vma(mm, vma, end, 0)) {
32                         ret = -EAGAIN;
33                         goto out;
34                 }
35         }
36
37         /*
38          * vm_flags is protected by the mmap_sem held in write mode.
39          * It's okay if try_to_unmap_one unmaps a page just after we
40          * set VM_LOCKED, make_pages_present below will bring it back.
41          */
42         vma->vm_flags = newflags;
43
44         /*
45          * Keep track of amount of locked VM.
46          */
47         pages = (end - start) >> PAGE_SHIFT;
48         if (newflags & VM_LOCKED) {
49                 pages = -pages;
50                 ret = make_pages_present(start, end);
51         }
52
53         vma->vm_mm->locked_vm -= pages;
54 out:
55         return ret;
56 }
57
58 static int do_mlock(unsigned long start, size_t len, int on)
59 {
60         unsigned long nstart, end, tmp;
61         struct vm_area_struct * vma, * next;
62         int error;
63
64         if (on && !capable(CAP_IPC_LOCK))
65                 return -EPERM;
66         len = PAGE_ALIGN(len);
67         end = start + len;
68         if (end < start)
69                 return -EINVAL;
70         if (end == start)
71                 return 0;
72         vma = find_vma(current->mm, start);
73         if (!vma || vma->vm_start > start)
74                 return -ENOMEM;
75
76         for (nstart = start ; ; ) {
77                 unsigned int newflags;
78
79                 /* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
80
81                 newflags = vma->vm_flags | VM_LOCKED;
82                 if (!on)
83                         newflags &= ~VM_LOCKED;
84
85                 if (vma->vm_end >= end) {
86                         error = mlock_fixup(vma, nstart, end, newflags);
87                         break;
88                 }
89
90                 tmp = vma->vm_end;
91                 next = vma->vm_next;
92                 error = mlock_fixup(vma, nstart, tmp, newflags);
93                 if (error)
94                         break;
95                 nstart = tmp;
96                 vma = next;
97                 if (!vma || vma->vm_start != nstart) {
98                         error = -ENOMEM;
99                         break;
100                 }
101         }
102         return error;
103 }
104
105 asmlinkage long sys_mlock(unsigned long start, size_t len)
106 {
107         unsigned long locked, grow;
108         unsigned long lock_limit;
109         int error = -ENOMEM;
110
111         down_write(&current->mm->mmap_sem);
112         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
113         start &= PAGE_MASK;
114
115         grow = len >> PAGE_SHIFT;
116         if (!vx_vmlocked_avail(current->mm, grow))
117                 goto out;
118         locked = current->mm->locked_vm + grow;
119
120         lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
121         lock_limit >>= PAGE_SHIFT;
122
123         /* check against resource limits */
124         if (locked <= lock_limit)
125                 error = do_mlock(start, len, 1);
126 out:
127         up_write(&current->mm->mmap_sem);
128         return error;
129 }
130
131 asmlinkage long sys_munlock(unsigned long start, size_t len)
132 {
133         int ret;
134
135         down_write(&current->mm->mmap_sem);
136         len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
137         start &= PAGE_MASK;
138         ret = do_mlock(start, len, 0);
139         up_write(&current->mm->mmap_sem);
140         return ret;
141 }
142
143 static int do_mlockall(int flags)
144 {
145         int error;
146         unsigned int def_flags;
147         struct vm_area_struct * vma;
148
149         if (!capable(CAP_IPC_LOCK))
150                 return -EPERM;
151
152         def_flags = 0;
153         if (flags & MCL_FUTURE)
154                 def_flags = VM_LOCKED;
155         current->mm->def_flags = def_flags;
156
157         error = 0;
158         for (vma = current->mm->mmap; vma ; vma = vma->vm_next) {
159                 unsigned int newflags;
160
161                 newflags = vma->vm_flags | VM_LOCKED;
162                 if (!(flags & MCL_CURRENT))
163                         newflags &= ~VM_LOCKED;
164
165                 /* Ignore errors */
166                 mlock_fixup(vma, vma->vm_start, vma->vm_end, newflags);
167         }
168         return error;
169 }
170
171 asmlinkage long sys_mlockall(int flags)
172 {
173         unsigned long lock_limit;
174         int ret = -EINVAL;
175
176         down_write(&current->mm->mmap_sem);
177         if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE)))
178                 goto out;
179
180         lock_limit = current->rlim[RLIMIT_MEMLOCK].rlim_cur;
181         lock_limit >>= PAGE_SHIFT;
182
183         ret = -ENOMEM;
184         if (!vx_vmlocked_avail(current->mm, current->mm->total_vm))
185                 goto out;
186         /* check vserver lock limits? */
187         if ((current->mm->total_vm <= lock_limit) || capable(CAP_IPC_LOCK))
188                 ret = do_mlockall(flags);
189 out:
190         up_write(&current->mm->mmap_sem);
191         return ret;
192 }
193
194 asmlinkage long sys_munlockall(void)
195 {
196         int ret;
197
198         down_write(&current->mm->mmap_sem);
199         ret = do_mlockall(0);
200         up_write(&current->mm->mmap_sem);
201         return ret;
202 }