Merge to Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.13-vs2...
[linux-2.6.git] / kernel / vserver / dlimit.c
1 /*
2  *  linux/kernel/vserver/dlimit.c
3  *
4  *  Virtual Server: Context Disk Limits
5  *
6  *  Copyright (C) 2004-2005  Herbert Pƶtzl
7  *
8  *  V0.01  initial version
9  *  V0.02  compat32 splitup
10  *
11  */
12
13 #include <linux/fs.h>
14 #include <linux/namespace.h>
15 #include <linux/namei.h>
16 #include <linux/statfs.h>
17 #include <linux/compat.h>
18 #include <linux/vserver/switch.h>
19 #include <linux/vs_context.h>
20 #include <linux/vs_dlimit.h>
21 #include <linux/vserver/dlimit_cmd.h>
22
23 #include <asm/errno.h>
24 #include <asm/uaccess.h>
25
26 /*      __alloc_dl_info()
27
28         * allocate an initialized dl_info struct
29         * doesn't make it visible (hash)                        */
30
31 static struct dl_info *__alloc_dl_info(struct super_block *sb, xid_t xid)
32 {
33         struct dl_info *new = NULL;
34
35         vxdprintk(VXD_CBIT(dlim, 5),
36                 "alloc_dl_info(%p,%d)*", sb, xid);
37
38         /* would this benefit from a slab cache? */
39         new = kmalloc(sizeof(struct dl_info), GFP_KERNEL);
40         if (!new)
41                 return 0;
42
43         memset (new, 0, sizeof(struct dl_info));
44         new->dl_xid = xid;
45         new->dl_sb = sb;
46         INIT_RCU_HEAD(&new->dl_rcu);
47         INIT_HLIST_NODE(&new->dl_hlist);
48         spin_lock_init(&new->dl_lock);
49         atomic_set(&new->dl_refcnt, 0);
50         atomic_set(&new->dl_usecnt, 0);
51
52         /* rest of init goes here */
53
54         vxdprintk(VXD_CBIT(dlim, 4),
55                 "alloc_dl_info(%p,%d) = %p", sb, xid, new);
56         return new;
57 }
58
59 /*      __dealloc_dl_info()
60
61         * final disposal of dl_info                             */
62
63 static void __dealloc_dl_info(struct dl_info *dli)
64 {
65         vxdprintk(VXD_CBIT(dlim, 4),
66                 "dealloc_dl_info(%p)", dli);
67
68         dli->dl_hlist.next = LIST_POISON1;
69         dli->dl_xid = -1;
70         dli->dl_sb = 0;
71
72         BUG_ON(atomic_read(&dli->dl_usecnt));
73         BUG_ON(atomic_read(&dli->dl_refcnt));
74
75         kfree(dli);
76 }
77
78
79 /*      hash table for dl_info hash */
80
81 #define DL_HASH_SIZE    13
82
83 struct hlist_head dl_info_hash[DL_HASH_SIZE];
84
85 static spinlock_t dl_info_hash_lock = SPIN_LOCK_UNLOCKED;
86
87
88 static inline unsigned int __hashval(struct super_block *sb, xid_t xid)
89 {
90         return ((xid ^ (unsigned long)sb) % DL_HASH_SIZE);
91 }
92
93
94
95 /*      __hash_dl_info()
96
97         * add the dli to the global hash table
98         * requires the hash_lock to be held                     */
99
100 static inline void __hash_dl_info(struct dl_info *dli)
101 {
102         struct hlist_head *head;
103
104         vxdprintk(VXD_CBIT(dlim, 6),
105                 "__hash_dl_info: %p[#%d]", dli, dli->dl_xid);
106         get_dl_info(dli);
107         head = &dl_info_hash[__hashval(dli->dl_sb, dli->dl_xid)];
108         hlist_add_head_rcu(&dli->dl_hlist, head);
109 }
110
111 /*      __unhash_dl_info()
112
113         * remove the dli from the global hash table
114         * requires the hash_lock to be held                     */
115
116 static inline void __unhash_dl_info(struct dl_info *dli)
117 {
118         vxdprintk(VXD_CBIT(dlim, 6),
119                 "__unhash_dl_info: %p[#%d]", dli, dli->dl_xid);
120         hlist_del_rcu(&dli->dl_hlist);
121         put_dl_info(dli);
122 }
123
124
125 /*      __lookup_dl_info()
126
127         * requires the rcu_read_lock()
128         * doesn't increment the dl_refcnt                       */
129
130 static inline struct dl_info *__lookup_dl_info(struct super_block *sb, xid_t xid)
131 {
132         struct hlist_head *head = &dl_info_hash[__hashval(sb, xid)];
133         struct hlist_node *pos;
134         struct dl_info *dli;
135
136         hlist_for_each_entry_rcu(dli, pos, head, dl_hlist) {
137
138                 if (dli->dl_xid == xid && dli->dl_sb == sb) {
139                         return dli;
140                 }
141         }
142         return NULL;
143 }
144
145
146 struct dl_info *locate_dl_info(struct super_block *sb, xid_t xid)
147 {
148         struct dl_info *dli;
149
150         rcu_read_lock();
151         dli = get_dl_info(__lookup_dl_info(sb, xid));
152         vxdprintk(VXD_CBIT(dlim, 7),
153                 "locate_dl_info(%p,#%d) = %p", sb, xid, dli);
154         rcu_read_unlock();
155         return dli;
156 }
157
158 void rcu_free_dl_info(struct rcu_head *head)
159 {
160         struct dl_info *dli = container_of(head, struct dl_info, dl_rcu);
161         int usecnt, refcnt;
162
163         BUG_ON(!dli || !head);
164
165         usecnt = atomic_read(&dli->dl_usecnt);
166         BUG_ON(usecnt < 0);
167
168         refcnt = atomic_read(&dli->dl_refcnt);
169         BUG_ON(refcnt < 0);
170
171         vxdprintk(VXD_CBIT(dlim, 3),
172                 "rcu_free_dl_info(%p)", dli);
173         if (!usecnt)
174                 __dealloc_dl_info(dli);
175         else
176                 printk("!!! rcu didn't free\n");
177 }
178
179
180
181
182 static int do_addrem_dlimit(uint32_t id, const char __user *name,
183         uint32_t flags, int add)
184 {
185         struct nameidata nd;
186         int ret;
187
188         ret = user_path_walk_link(name, &nd);
189         if (!ret) {
190                 struct super_block *sb;
191                 struct dl_info *dli;
192
193                 ret = -EINVAL;
194                 if (!nd.dentry->d_inode)
195                         goto out_release;
196                 if (!(sb = nd.dentry->d_inode->i_sb))
197                         goto out_release;
198
199                 if (add) {
200                         dli = __alloc_dl_info(sb, id);
201                         spin_lock(&dl_info_hash_lock);
202
203                         ret = -EEXIST;
204                         if (__lookup_dl_info(sb, id))
205                                 goto out_unlock;
206                         __hash_dl_info(dli);
207                         dli = NULL;
208                 } else {
209                         spin_lock(&dl_info_hash_lock);
210                         dli = __lookup_dl_info(sb, id);
211
212                         ret = -ESRCH;
213                         if (!dli)
214                                 goto out_unlock;
215                         __unhash_dl_info(dli);
216                 }
217                 ret = 0;
218         out_unlock:
219                 spin_unlock(&dl_info_hash_lock);
220                 if (add && dli)
221                         __dealloc_dl_info(dli);
222         out_release:
223                 path_release(&nd);
224         }
225         return ret;
226 }
227
228 int vc_add_dlimit(uint32_t id, void __user *data)
229 {
230         struct vcmd_ctx_dlimit_base_v0 vc_data;
231
232         if (!vx_check(0, VX_ADMIN))
233                 return -ENOSYS;
234         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
235                 return -EFAULT;
236
237         return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 1);
238 }
239
240 int vc_rem_dlimit(uint32_t id, void __user *data)
241 {
242         struct vcmd_ctx_dlimit_base_v0 vc_data;
243
244         if (!vx_check(0, VX_ADMIN))
245                 return -ENOSYS;
246         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
247                 return -EFAULT;
248
249         return do_addrem_dlimit(id, vc_data.name, vc_data.flags, 0);
250 }
251
252 #ifdef  CONFIG_COMPAT
253
254 int vc_add_dlimit_x32(uint32_t id, void __user *data)
255 {
256         struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
257
258         if (!vx_check(0, VX_ADMIN))
259                 return -ENOSYS;
260         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
261                 return -EFAULT;
262
263         return do_addrem_dlimit(id,
264                 compat_ptr(vc_data.name_ptr), vc_data.flags, 1);
265 }
266
267 int vc_rem_dlimit_x32(uint32_t id, void __user *data)
268 {
269         struct vcmd_ctx_dlimit_base_v0_x32 vc_data;
270
271         if (!vx_check(0, VX_ADMIN))
272                 return -ENOSYS;
273         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
274                 return -EFAULT;
275
276         return do_addrem_dlimit(id,
277                 compat_ptr(vc_data.name_ptr), vc_data.flags, 0);
278 }
279
280 #endif  /* CONFIG_COMPAT */
281
282
283 static inline
284 int do_set_dlimit(uint32_t id, const char __user *name,
285         uint32_t space_used, uint32_t space_total,
286         uint32_t inodes_used, uint32_t inodes_total,
287         uint32_t reserved, uint32_t flags)
288 {
289         struct nameidata nd;
290         int ret;
291
292         ret = user_path_walk_link(name, &nd);
293         if (!ret) {
294                 struct super_block *sb;
295                 struct dl_info *dli;
296
297                 ret = -EINVAL;
298                 if (!nd.dentry->d_inode)
299                         goto out_release;
300                 if (!(sb = nd.dentry->d_inode->i_sb))
301                         goto out_release;
302                 if ((reserved != (uint32_t)CDLIM_KEEP &&
303                         reserved > 100) ||
304                         (inodes_used != (uint32_t)CDLIM_KEEP &&
305                         inodes_used > inodes_total) ||
306                         (space_used != (uint32_t)CDLIM_KEEP &&
307                         space_used > space_total))
308                         goto out_release;
309
310                 ret = -ESRCH;
311                 dli = locate_dl_info(sb, id);
312                 if (!dli)
313                         goto out_release;
314
315                 spin_lock(&dli->dl_lock);
316
317                 if (inodes_used != (uint32_t)CDLIM_KEEP)
318                         dli->dl_inodes_used = inodes_used;
319                 if (inodes_total != (uint32_t)CDLIM_KEEP)
320                         dli->dl_inodes_total = inodes_total;
321                 if (space_used != (uint32_t)CDLIM_KEEP) {
322                         dli->dl_space_used = space_used;
323                         dli->dl_space_used <<= 10;
324                 }
325                 if (space_total == (uint32_t)CDLIM_INFINITY)
326                         dli->dl_space_total = (uint64_t)CDLIM_INFINITY;
327                 else if (space_total != (uint32_t)CDLIM_KEEP) {
328                         dli->dl_space_total = space_total;
329                         dli->dl_space_total <<= 10;
330                 }
331                 if (reserved != (uint32_t)CDLIM_KEEP)
332                         dli->dl_nrlmult = (1 << 10) * (100 - reserved) / 100;
333
334                 spin_unlock(&dli->dl_lock);
335
336                 put_dl_info(dli);
337                 ret = 0;
338
339         out_release:
340                 path_release(&nd);
341         }
342         return ret;
343 }
344
345 int vc_set_dlimit(uint32_t id, void __user *data)
346 {
347         struct vcmd_ctx_dlimit_v0 vc_data;
348
349         if (!vx_check(0, VX_ADMIN))
350                 return -ENOSYS;
351         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
352                 return -EFAULT;
353
354         return do_set_dlimit(id, vc_data.name,
355                 vc_data.space_used, vc_data.space_total,
356                 vc_data.inodes_used, vc_data.inodes_total,
357                 vc_data.reserved, vc_data.flags);
358 }
359
360 #ifdef  CONFIG_COMPAT
361
362 int vc_set_dlimit_x32(uint32_t id, void __user *data)
363 {
364         struct vcmd_ctx_dlimit_v0_x32 vc_data;
365
366         if (!vx_check(0, VX_ADMIN))
367                 return -ENOSYS;
368         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
369                 return -EFAULT;
370
371         return do_set_dlimit(id, compat_ptr(vc_data.name_ptr),
372                 vc_data.space_used, vc_data.space_total,
373                 vc_data.inodes_used, vc_data.inodes_total,
374                 vc_data.reserved, vc_data.flags);
375 }
376
377 #endif  /* CONFIG_COMPAT */
378
379
380 static inline
381 int do_get_dlimit(uint32_t id, const char __user *name,
382         uint32_t *space_used, uint32_t *space_total,
383         uint32_t *inodes_used, uint32_t *inodes_total,
384         uint32_t *reserved, uint32_t *flags)
385 {
386         struct nameidata nd;
387         int ret;
388
389         ret = user_path_walk_link(name, &nd);
390         if (!ret) {
391                 struct super_block *sb;
392                 struct dl_info *dli;
393
394                 ret = -EINVAL;
395                 if (!nd.dentry->d_inode)
396                         goto out_release;
397                 if (!(sb = nd.dentry->d_inode->i_sb))
398                         goto out_release;
399
400                 ret = -ESRCH;
401                 dli = locate_dl_info(sb, id);
402                 if (!dli)
403                         goto out_release;
404
405                 spin_lock(&dli->dl_lock);
406                 *inodes_used = dli->dl_inodes_used;
407                 *inodes_total = dli->dl_inodes_total;
408                 *space_used = dli->dl_space_used >> 10;
409                 if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
410                         *space_total = (uint32_t)CDLIM_INFINITY;
411                 else
412                         *space_total = dli->dl_space_total >> 10;
413
414                 *reserved = 100 - ((dli->dl_nrlmult * 100 + 512) >> 10);
415                 spin_unlock(&dli->dl_lock);
416
417                 put_dl_info(dli);
418                 ret = -EFAULT;
419
420                 ret = 0;
421         out_release:
422                 path_release(&nd);
423         }
424         return ret;
425 }
426
427
428 int vc_get_dlimit(uint32_t id, void __user *data)
429 {
430         struct vcmd_ctx_dlimit_v0 vc_data;
431         int ret;
432
433         if (!vx_check(0, VX_ADMIN))
434                 return -ENOSYS;
435         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
436                 return -EFAULT;
437
438         ret = do_get_dlimit(id, vc_data.name,
439                 &vc_data.space_used, &vc_data.space_total,
440                 &vc_data.inodes_used, &vc_data.inodes_total,
441                 &vc_data.reserved, &vc_data.flags);
442         if (ret)
443                 return ret;
444
445         if (copy_to_user(data, &vc_data, sizeof(vc_data)))
446                 return -EFAULT;
447         return 0;
448 }
449
450 #ifdef  CONFIG_COMPAT
451
452 int vc_get_dlimit_x32(uint32_t id, void __user *data)
453 {
454         struct vcmd_ctx_dlimit_v0_x32 vc_data;
455         int ret;
456
457         if (!vx_check(0, VX_ADMIN))
458                 return -ENOSYS;
459         if (copy_from_user (&vc_data, data, sizeof(vc_data)))
460                 return -EFAULT;
461
462         ret = do_get_dlimit(id, compat_ptr(vc_data.name_ptr),
463                 &vc_data.space_used, &vc_data.space_total,
464                 &vc_data.inodes_used, &vc_data.inodes_total,
465                 &vc_data.reserved, &vc_data.flags);
466         if (ret)
467                 return ret;
468
469         if (copy_to_user(data, &vc_data, sizeof(vc_data)))
470                 return -EFAULT;
471         return 0;
472 }
473
474 #endif  /* CONFIG_COMPAT */
475
476
477 void vx_vsi_statfs(struct super_block *sb, struct kstatfs *buf)
478 {
479         struct dl_info *dli;
480         __u64 blimit, bfree, bavail;
481         __u32 ifree;
482
483         dli = locate_dl_info(sb, vx_current_xid());
484         if (!dli)
485                 return;
486
487         spin_lock(&dli->dl_lock);
488         if (dli->dl_inodes_total == (uint32_t)CDLIM_INFINITY)
489                 goto no_ilim;
490
491         /* reduce max inodes available to limit */
492         if (buf->f_files > dli->dl_inodes_total)
493                 buf->f_files = dli->dl_inodes_total;
494
495         /* inode hack for reiserfs */
496         if ((buf->f_files == 0) && (dli->dl_inodes_total > 0)) {
497                 buf->f_files = dli->dl_inodes_total;
498                 buf->f_ffree = dli->dl_inodes_total;
499         }
500
501         ifree = dli->dl_inodes_total - dli->dl_inodes_used;
502         /* reduce free inodes to min */
503         if (ifree < buf->f_ffree)
504                 buf->f_ffree = ifree;
505
506 no_ilim:
507         if (dli->dl_space_total == (uint64_t)CDLIM_INFINITY)
508                 goto no_blim;
509
510         blimit = dli->dl_space_total >> sb->s_blocksize_bits;
511
512         if (dli->dl_space_total < dli->dl_space_used)
513                 bfree = 0;
514         else
515                 bfree = (dli->dl_space_total - dli->dl_space_used)
516                         >> sb->s_blocksize_bits;
517
518         bavail = ((dli->dl_space_total >> 10) * dli->dl_nrlmult);
519         if (bavail < dli->dl_space_used)
520                 bavail = 0;
521         else
522                 bavail = (bavail - dli->dl_space_used)
523                         >> sb->s_blocksize_bits;
524
525         /* reduce max space available to limit */
526         if (buf->f_blocks > blimit)
527                 buf->f_blocks = blimit;
528
529         /* reduce free space to min */
530         if (bfree < buf->f_bfree)
531                 buf->f_bfree = bfree;
532
533         /* reduce avail space to min */
534         if (bavail < buf->f_bavail)
535                 buf->f_bavail = bavail;
536
537 no_blim:
538         spin_unlock(&dli->dl_lock);
539         put_dl_info(dli);
540
541         return;
542 }
543
544 #include <linux/module.h>
545
546 EXPORT_SYMBOL_GPL(locate_dl_info);
547 EXPORT_SYMBOL_GPL(rcu_free_dl_info);
548