backported vs2.1.x fix to irq handling, which caused incorrect scheduler behavior
[linux-2.6.git] / kernel / vserver / dlimit.c
1 /*
2  *  linux/kernel/vserver/dlimit.c
3  *
4  *  Virtual Server: Context Disk Limits
5  *
6  *  Copyright (C) 2004-2005  Herbert Pƶtzl
7  *
8  *  V0.01  initial version
9  *  V0.02  compat32 splitup
10  *
11  */
12
13 #include <linux/fs.h>
14 #include <linux/namespace.h>
15 #include <linux/namei.h>
16 #include <linux/statfs.h>
17 #include <linux/compat.h>
18 #include <linux/vs_base.h>
19 #include <linux/vs_context.h>
20 #include <linux/vs_dlimit.h>
21 #include <linux/vserver/switch.h>
22 #include <linux/vserver/dlimit_cmd.h>
23
24 #include <asm/errno.h>
25 #include <asm/uaccess.h>
26
27 /*      __alloc_dl_info()
28
29         * allocate an initialized dl_info struct
30         * doesn't make it visible (hash)                        */
31
32 static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid)
33 {
34         struct dl_info *new = NULL;
35
36         vxdprintk(VXD_CBIT(dlim, 5),
37                 "alloc_dl_info(%p,%d)*", sb, xid);
38
39         /* would this benefit from a slab cache? */
40         new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
41         if (!new)
42                 return 0;
43
44         memset (new, 0, sizeof(struct dl_info));
45         new->dl_xid = xid;
46         new->dl_sb = sb;
47         INIT_RCU_HEAD(&new->dl_rcu);
48         INIT_HLIST_NODE(&new->dl_hlist);
49         spin_lock_init(&new->dl_lock);
50         atomic_set(&new->dl_refcnt, 0);
51         atomic_set(&new->dl_usecnt, 0);
52
53         /* rest of init goes here */
54
55         vxdprintk(VXD_CBIT(dlim, 4),
56                 "alloc_dl_info(%p,%d) = %p", sb, xid, new);
57         return new;
58 }
59
60 /*      __dealloc_dl_info()
61
62         * final disposal of dl_info                             */
63
64 static void __dealloc_dl_info(struct dl_info *dli)
65 {
66         vxdprintk(VXD_CBIT(dlim, 4),
67                 "dealloc_dl_info(%p)", dli);
68
69         dli->dl_hlist.next = LIST_POISON1;
70         dli->dl_xid = -1;
71         dli->dl_sb = 0;
72
73         BUG_ON(atomic_read(&dli->dl_usecnt));
74         BUG_ON(atomic_read(&dli->dl_refcnt));
75
76         kfree(dli);
77 }
78
79
80 /*      hash table for dl_info hash */
81
82 #define DL_HASH_SIZE    13
83
84 struct hlist_head dl_info_hash[DL_HASH_SIZE];
85
86 static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
87
88
89 static inline unsigned int __hashval(struct super_block *sb, xid_t xid)
90 {
91         return ((xid ^ (unsigned long)sb) % DL_HASH_SIZE);
92 }
93
94
95
96 /*      __hash_dl_info()
97
98         * add the dli to the global hash table
99         * requires the hash_lock to be held                     */
100
101 static inline void __hash_dl_info(struct dl_info *dli)
102 {
103         struct hlist_head *head;
104
105         vxdprintk(VXD_CBIT(dlim, 6),
106                 "__hash_dl_info: %p[#%d]", dli, dli->dl_xid);
107         get_dl_info(dli);
108         head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)];
109         hlist_add_head_rcu(&dli->dl_hlist, head);
110 }
111
112 /*      __unhash_dl_info()
113
114         * remove the dli from the global hash table
115         * requires the hash_lock to be held                     */
116
117 static inline void __unhash_dl_info(struct dl_info *dli)
118 {
119         vxdprintk(VXD_CBIT(dlim, 6),
120                 "__unhash_dl_info: %p[#%d]", dli, dli->dl_xid);
121         hlist_del_rcu(&dli->dl_hlist);
122         put_dl_info(dli);
123 }
124
125
126 /*      __lookup_dl_info()
127
128         * requires the rcu_read_lock()
129         * doesn't increment the dl_refcnt                       */
130
131 static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid)
132 {
133         struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)];
134         struct hlist_node *pos;
135         struct dl_info *dli;
136
137         hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
138
139                 if (dli->dl_xid == xid && dli->dl_sb == sb) {
140                         return dli;
141                 }
142         }
143         return NULL;
144 }
145
146
147 struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid)
148 {
149         struct dl_info *dli;
150
151         rcu_read_lock();
152         dli = get_dl_info(__lookup_dl_info(sb, xid));
153         vxdprintk(VXD_CBIT(dlim, 7),
154                 "locate_dl_info(%p,#%d) = %p", sb, xid, dli);
155         rcu_read_unlock();
156         return dli;
157 }
158
159 void rcu_free_dl_info(struct rcu_head *head)
160 {
161         struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
162         int usecnt, refcnt;
163
164         BUG_ON(!dli || !head);
165
166         usecnt = atomic_read(&dli->dl_usecnt);
167         BUG_ON(usecnt < 0);
168
169         refcnt = atomic_read(&dli->dl_refcnt);
170         BUG_ON(refcnt < 0);
171
172         vxdprintk(VXD_CBIT(dlim, 3),
173                 "rcu_free_dl_info(%p)", dli);
174         if (!usecnt)
175                 __dealloc_dl_info(dli);
176         else
177                 printk("!!! rcu didn't free\n");
178 }
179
180
181
182
183 static int do_addrem_dlimit(uint32_t id, const char __user *name,
184         uint32_t flags, int add)
185 {
186         struct nameidata nd;
187         int ret;
188
189         ret = user_path_walk_link(name, &nd);
190         if (!ret) {
191                 struct super_block *sb;
192                 struct dl_info *dli;
193
194                 ret = -EINVAL;
195                 if (!nd.dentry->d_inode)
196                         goto out_release;
197                 if (!(sb = nd.dentry->d_inode->i_sb))
198                         goto out_release;
199
200                 if (add) {
201                         dli = __alloc_dl_info(sb, id);
202                         spin_lock(&dl_info_hash_lock);
203
204                         ret = -EEXIST;
205                         if (__lookup_dl_info(sb, id))
206                                 goto out_unlock;
207                         __hash_dl_info(dli);
208                         dli = NULL;
209                 } else {
210                         spin_lock(&dl_info_hash_lock);
211                         dli = __lookup_dl_info(sb, id);
212
213                         ret = -ESRCH;
214                         if (!dli)
215                                 goto out_unlock;
216                         __unhash_dl_info(dli);
217                 }
218                 ret = 0;
219         out_unlock:
220                 spin_unlock(&dl_info_hash_lock);
221                 if (add && dli)
222                         __dealloc_dl_info(dli);
223         out_release:
224                 path_release(&nd);
225         }
226         return ret;
227 }
228
229 int vc_add_dlimit(uint32_t id, void __user *data)
230 {
231         struct vcmd_ctx_dlimit_base_v0 vc_data;
232
233         if (!vx_check(0, VX_ADMIN))
234                 return -ENOSYS;
235         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
236                 return -EFAULT;
237
238         return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
239 }
240
241 int vc_rem_dlimit(uint32_t id, void __user *data)
242 {
243         struct vcmd_ctx_dlimit_base_v0 vc_data;
244
245         if (!vx_check(0, VX_ADMIN))
246                 return -ENOSYS;
247         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
248                 return -EFAULT;
249
250         return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
251 }
252
253 #ifdef  CONFIG_COMPAT
254
255 int vc_add_dlimit_x32(uint32_t id, void __user *data)
256 {
257         struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
258
259         if (!vx_check(0, VX_ADMIN))
260                 return -ENOSYS;
261         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
262                 return -EFAULT;
263
264         return do_addrem_dlimit(id,
265                 compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
266 }
267
268 int vc_rem_dlimit_x32(uint32_t id, void __user *data)
269 {
270         struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
271
272         if (!vx_check(0, VX_ADMIN))
273                 return -ENOSYS;
274         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
275                 return -EFAULT;
276
277         return do_addrem_dlimit(id,
278                 compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
279 }
280
281 #endif  /* CONFIG_COMPAT */
282
283
284 static inline
285 int do_set_dlimit(uint32_t id, const char __user *name,
286         uint32_t space_used, uint32_t space_total,
287         uint32_t inodes_used, uint32_t inodes_total,
288         uint32_t reserved, uint32_t flags)
289 {
290         struct nameidata nd;
291         int ret;
292
293         ret = user_path_walk_link(name, &nd);
294         if (!ret) {
295                 struct super_block *sb;
296                 struct dl_info *dli;
297
298                 ret = -EINVAL;
299                 if (!nd.dentry->d_inode)
300                         goto out_release;
301                 if (!(sb = nd.dentry->d_inode->i_sb))
302                         goto out_release;
303                 if ((reserved != (uint32_t)CDLIM_KEEP &&
304                         reserved > 100) ||
305                         (inodes_used != (uint32_t)CDLIM_KEEP &&
306                         inodes_used > inodes_total) ||
307                         (space_used != (uint32_t)CDLIM_KEEP &&
308                         space_used > space_total))
309                         goto out_release;
310
311                 ret = -ESRCH;
312                 dli = locate_dl_info(sb, id);
313                 if (!dli)
314                         goto out_release;
315
316                 spin_lock(&dli->dl_lock);
317
318                 if (inodes_used != (uint32_t)CDLIM_KEEP)
319                         dli->dl_inodes_used = inodes_used;
320                 if (inodes_total != (uint32_t)CDLIM_KEEP)
321                         dli->dl_inodes_total = inodes_total;
322                 if (space_used != (uint32_t)CDLIM_KEEP) {
323                         dli->dl_space_used = space_used;
324                         dli->dl_space_used <<= 10;
325                 }
326                 if (space_total == (uint32_t)CDLIM_INFINITY)
327                         dli->dl_space_total = (uint64_t)CDLIM_INFINITY;
328                 else if (space_total != (uint32_t)CDLIM_KEEP) {
329                         dli->dl_space_total = space_total;
330                         dli->dl_space_total <<= 10;
331                 }
332                 if (reserved != (uint32_t)CDLIM_KEEP)
333                         dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
334
335                 spin_unlock(&dli->dl_lock);
336
337                 put_dl_info(dli);
338                 ret = 0;
339
340         out_release:
341                 path_release(&nd);
342         }
343         return ret;
344 }
345
346 int vc_set_dlimit(uint32_t id, void __user *data)
347 {
348         struct vcmd_ctx_dlimit_v0 vc_data;
349
350         if (!vx_check(0, VX_ADMIN))
351                 return -ENOSYS;
352         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
353                 return -EFAULT;
354
355         return do_set_dlimit(id, vc_data.name,
356                 vc_data.space_used, vc_data.space_total,
357                 vc_data.inodes_used, vc_data.inodes_total,
358                 vc_data.reserved, vc_data.flags);
359 }
360
361 #ifdef  CONFIG_COMPAT
362
363 int vc_set_dlimit_x32(uint32_t id, void __user *data)
364 {
365         struct vcmd_ctx_dlimit_v0_x32 vc_data;
366
367         if (!vx_check(0, VX_ADMIN))
368                 return -ENOSYS;
369         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
370                 return -EFAULT;
371
372         return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
373                 vc_data.space_used, vc_data.space_total,
374                 vc_data.inodes_used, vc_data.inodes_total,
375                 vc_data.reserved, vc_data.flags);
376 }
377
378 #endif  /* CONFIG_COMPAT */
379
380
381 static inline
382 int do_get_dlimit(uint32_t id, const char __user *name,
383         uint32_t *space_used, uint32_t *space_total,
384         uint32_t *inodes_used, uint32_t *inodes_total,
385         uint32_t *reserved, uint32_t *flags)
386 {
387         struct nameidata nd;
388         int ret;
389
390         ret = user_path_walk_link(name, &nd);
391         if (!ret) {
392                 struct super_block *sb;
393                 struct dl_info *dli;
394
395                 ret = -EINVAL;
396                 if (!nd.dentry->d_inode)
397                         goto out_release;
398                 if (!(sb = nd.dentry->d_inode->i_sb))
399                         goto out_release;
400
401                 ret = -ESRCH;
402                 dli = locate_dl_info(sb, id);
403                 if (!dli)
404                         goto out_release;
405
406                 spin_lock(&dli->dl_lock);
407                 *inodes_used = dli->dl_inodes_used;
408                 *inodes_total = dli->dl_inodes_total;
409                 *space_used = dli->dl_space_used >> 10;
410                 if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
411                         *space_total = (uint32_t)CDLIM_INFINITY;
412                 else
413                         *space_total = dli->dl_space_total >> 10;
414
415                 *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
416                 spin_unlock(&dli->dl_lock);
417
418                 put_dl_info(dli);
419                 ret = -EFAULT;
420
421                 ret = 0;
422         out_release:
423                 path_release(&nd);
424         }
425         return ret;
426 }
427
428
429 int vc_get_dlimit(uint32_t id, void __user *data)
430 {
431         struct vcmd_ctx_dlimit_v0 vc_data;
432         int ret;
433
434         if (!vx_check(0, VX_ADMIN))
435                 return -ENOSYS;
436         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
437                 return -EFAULT;
438
439         ret = do_get_dlimit(id, vc_data.name,
440                 &vc_data.space_used, &vc_data.space_total,
441                 &vc_data.inodes_used, &vc_data.inodes_total,
442                 &vc_data.reserved, &vc_data.flags);
443         if (ret)
444                 return ret;
445
446         if (copy_to_user(data, &vc_data, sizeof(vc_data)))
447                 return -EFAULT;
448         return 0;
449 }
450
451 #ifdef  CONFIG_COMPAT
452
453 int vc_get_dlimit_x32(uint32_t id, void __user *data)
454 {
455         struct vcmd_ctx_dlimit_v0_x32 vc_data;
456         int ret;
457
458         if (!vx_check(0, VX_ADMIN))
459                 return -ENOSYS;
460         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
461                 return -EFAULT;
462
463         ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
464                 &vc_data.space_used, &vc_data.space_total,
465                 &vc_data.inodes_used, &vc_data.inodes_total,
466                 &vc_data.reserved, &vc_data.flags);
467         if (ret)
468                 return ret;
469
470         if (copy_to_user(data, &vc_data, sizeof(vc_data)))
471                 return -EFAULT;
472         return 0;
473 }
474
475 #endif  /* CONFIG_COMPAT */
476
477
478 void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
479 {
480         struct dl_info *dli;
481         __u64 blimit, bfree, bavail;
482         __u32 ifree;
483
484         dli = locate_dl_info(sb, vx_current_xid());
485         if (!dli)
486                 return;
487
488         spin_lock(&dli->dl_lock);
489         if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY)
490                 goto no_ilim;
491
492         /* reduce max inodes available to limit */
493         if (buf->f_files > dli->dl_inodes_total)
494                 buf->f_files = dli->dl_inodes_total;
495
496         /* inode hack for reiserfs */
497         if ((buf->f_files == 0) && (dli->dl_inodes_total > 0)) {
498                 buf->f_files = dli->dl_inodes_total;
499                 buf->f_ffree = dli->dl_inodes_total;
500         }
501
502         ifree = dli->dl_inodes_total - dli->dl_inodes_used;
503         /* reduce free inodes to min */
504         if (ifree < buf->f_ffree)
505                 buf->f_ffree = ifree;
506
507 no_ilim:
508         if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
509                 goto no_blim;
510
511         blimit = dli->dl_space_total >> sb->s_blocksize_bits;
512
513         if (dli->dl_space_total < dli->dl_space_used)
514                 bfree = 0;
515         else
516                 bfree = (dli->dl_space_total - dli->dl_space_used)
517                         >> sb->s_blocksize_bits;
518
519         bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
520         if (bavail < dli->dl_space_used)
521                 bavail = 0;
522         else
523                 bavail = (bavail - dli->dl_space_used)
524                         >> sb->s_blocksize_bits;
525
526         /* reduce max space available to limit */
527         if (buf->f_blocks > blimit)
528                 buf->f_blocks = blimit;
529
530         /* reduce free space to min */
531         if (bfree < buf->f_bfree)
532                 buf->f_bfree = bfree;
533
534         /* reduce avail space to min */
535         if (bavail < buf->f_bavail)
536                 buf->f_bavail = bavail;
537
538 no_blim:
539         spin_unlock(&dli->dl_lock);
540         put_dl_info(dli);
541
542         return;
543 }
544
545 #include <linux/module.h>
546
547 EXPORT_SYMBOL_GPL(locate_dl_info);
548 EXPORT_SYMBOL_GPL(rcu_free_dl_info);
549