2 * linux/kernel/vserver/context.c
4 * Virtual Server: Context Support
6 * Copyright (C) 2003-2004 Herbert Pƶtzl
9 * V0.02 vx_ctx_kill syscall command
10 * V0.03 replaced context_info calls
11 * V0.04 redesign of struct (de)alloc
12 * V0.05 rlimit basic implementation
13 * V0.06 task_xid and info commands
14 * V0.07 context flags and caps
15 * V0.08 switch to RCU based hash
19 #include <linux/config.h>
20 #include <linux/slab.h>
21 #include <linux/vserver.h>
22 #include <linux/vserver/legacy.h>
23 #include <linux/vs_base.h>
24 #include <linux/vs_context.h>
25 #include <linux/kernel_stat.h>
26 #include <linux/namespace.h>
27 #include <linux/rcupdate.h>
29 #include <asm/errno.h>
34 * allocate an initialized vx_info struct
35 * doesn't make it visible (hash) */
37 static struct vx_info *__alloc_vx_info(xid_t xid)
39 struct vx_info *new = NULL;
41 vxdprintk("alloc_vx_info(%d)\n", xid);
43 /* would this benefit from a slab cache? */
44 new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
48 memset (new, 0, sizeof(struct vx_info));
50 INIT_RCU_HEAD(&new->vx_rcu);
51 INIT_HLIST_NODE(&new->vx_hlist);
52 atomic_set(&new->vx_refcnt, 0);
53 atomic_set(&new->vx_usecnt, 0);
55 /* rest of init goes here */
56 vx_info_init_limit(&new->limit);
57 vx_info_init_sched(&new->sched);
58 vx_info_init_cvirt(&new->cvirt);
59 vx_info_init_cacct(&new->cacct);
61 new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
62 new->vx_bcaps = CAP_INIT_EFF_SET;
65 vxdprintk("alloc_vx_info(%d) = %p\n", xid, new);
69 /* __dealloc_vx_info()
71 * final disposal of vx_info */
73 static void __dealloc_vx_info(struct vx_info *vxi)
75 vxdprintk("dealloc_vx_info(%p)\n", vxi);
77 vxi->vx_hlist.next = LIST_POISON1;
80 if (vxi->vx_namespace)
81 put_namespace(vxi->vx_namespace);
83 put_fs_struct(vxi->vx_fs);
85 vx_info_exit_limit(&vxi->limit);
86 vx_info_exit_sched(&vxi->sched);
87 vx_info_exit_cvirt(&vxi->cvirt);
88 vx_info_exit_cacct(&vxi->cacct);
90 BUG_ON(atomic_read(&vxi->vx_usecnt));
91 BUG_ON(atomic_read(&vxi->vx_refcnt));
97 /* hash table for vx_info hash */
99 #define VX_HASH_SIZE 13
101 struct hlist_head vx_info_hash[VX_HASH_SIZE];
103 static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
106 static inline unsigned int __hashval(xid_t xid)
108 return (xid % VX_HASH_SIZE);
115 * add the vxi to the global hash table
116 * requires the hash_lock to be held */
118 static inline void __hash_vx_info(struct vx_info *vxi)
120 struct hlist_head *head;
122 vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
124 head = &vx_info_hash[__hashval(vxi->vx_id)];
125 hlist_add_head_rcu(&vxi->vx_hlist, head);
128 /* __unhash_vx_info()
130 * remove the vxi from the global hash table
131 * requires the hash_lock to be held */
133 static inline void __unhash_vx_info(struct vx_info *vxi)
135 vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
136 hlist_del_rcu(&vxi->vx_hlist);
141 /* __lookup_vx_info()
143 * requires the rcu_read_lock()
144 * doesn't increment the vx_refcnt */
146 static inline struct vx_info *__lookup_vx_info(xid_t xid)
148 struct hlist_head *head = &vx_info_hash[__hashval(xid)];
149 struct hlist_node *pos;
151 hlist_for_each_rcu(pos, head) {
152 struct vx_info *vxi =
153 hlist_entry(pos, struct vx_info, vx_hlist);
155 if (vxi->vx_id == xid) {
165 * find unused dynamic xid
166 * requires the hash_lock to be held */
168 static inline xid_t __vx_dynamic_id(void)
170 static xid_t seq = MAX_S_CONTEXT;
174 if (++seq > MAX_S_CONTEXT)
176 if (!__lookup_vx_info(seq))
178 } while (barrier != seq);
184 * locate or create the requested context
185 * get() it and if new hash it */
187 static struct vx_info * __loc_vx_info(int id, int *err)
189 struct vx_info *new, *vxi = NULL;
191 vxdprintk("loc_vx_info(%d)\n", id);
193 if (!(new = __alloc_vx_info(id))) {
198 spin_lock(&vx_info_hash_lock);
200 /* dynamic context requested */
201 if (id == VX_DYNAMIC_ID) {
202 id = __vx_dynamic_id();
204 printk(KERN_ERR "no dynamic context available.\n");
209 /* existing context requested */
210 else if ((vxi = __lookup_vx_info(id))) {
211 /* context in setup is not available */
212 if (vxi->vx_flags & VXF_STATE_SETUP) {
213 vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi);
217 vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi);
224 /* new context requested */
225 vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new);
226 __hash_vx_info(get_vx_info(new));
227 vxi = new, new = NULL;
231 spin_unlock(&vx_info_hash_lock);
233 __dealloc_vx_info(new);
243 void rcu_free_vx_info(struct rcu_head *head)
245 struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu);
248 BUG_ON(!vxi || !head);
250 usecnt = atomic_read(&vxi->vx_usecnt);
253 refcnt = atomic_read(&vxi->vx_refcnt);
257 __dealloc_vx_info(vxi);
259 printk("!!! rcu didn't free\n");
262 void unhash_vx_info(struct vx_info *vxi)
264 spin_lock(&vx_info_hash_lock);
265 __unhash_vx_info(vxi);
266 spin_unlock(&vx_info_hash_lock);
271 * search for a vx_info and get() it
272 * negative id means current */
274 struct vx_info *locate_vx_info(int id)
279 vxi = get_vx_info(current->vx_info);
282 vxi = get_vx_info(__lookup_vx_info(id));
288 /* vx_info_is_hashed()
290 * verify that xid is still hashed */
292 int vx_info_is_hashed(xid_t xid)
297 hashed = (__lookup_vx_info(xid) != NULL);
302 #ifdef CONFIG_VSERVER_LEGACY
305 struct vx_info *alloc_vx_info(xid_t xid)
307 return __alloc_vx_info(xid);
311 struct vx_info *locate_or_create_vx_info(int id)
315 return __loc_vx_info(id, &err);
320 #ifdef CONFIG_PROC_FS
322 #define hlist_for_each_rcu(pos, head) \
323 for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
324 pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
326 int get_xid_list(int index, unsigned int *xids, int size)
328 int hindex, nr_xids = 0;
331 for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
332 struct hlist_head *head = &vx_info_hash[hindex];
333 struct hlist_node *pos;
335 hlist_for_each_rcu(pos, head) {
341 vxi = hlist_entry(pos, struct vx_info, vx_hlist);
342 xids[nr_xids] = vxi->vx_id;
343 if (++nr_xids >= size)
353 int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
355 struct user_struct *new_user, *old_user;
359 new_user = alloc_uid(vxi->vx_id, p->uid);
364 if (new_user != old_user) {
365 atomic_inc(&new_user->processes);
366 atomic_dec(&old_user->processes);
373 void vx_mask_bcaps(struct task_struct *p)
375 struct vx_info *vxi = p->vx_info;
377 p->cap_effective &= vxi->vx_bcaps;
378 p->cap_inheritable &= vxi->vx_bcaps;
379 p->cap_permitted &= vxi->vx_bcaps;
383 #include <linux/file.h>
385 static inline int vx_nofiles_task(struct task_struct *tsk)
387 struct files_struct *files = tsk->files;
388 const unsigned long *obptr, *cbptr;
391 spin_lock(&files->file_lock);
392 obptr = files->open_fds->fds_bits;
393 cbptr = files->close_on_exec->fds_bits;
394 count = files->max_fds / (sizeof(unsigned long) * 8);
395 for (total = 0; count > 0; count--) {
397 total += hweight_long(*obptr);
400 total += hweight_long(*cbptr);
403 spin_unlock(&files->file_lock);
407 static inline int vx_openfd_task(struct task_struct *tsk)
409 struct files_struct *files = tsk->files;
410 const unsigned long *bptr;
413 spin_lock(&files->file_lock);
414 bptr = files->open_fds->fds_bits;
415 count = files->max_fds / (sizeof(unsigned long) * 8);
416 for (total = 0; count > 0; count--) {
418 total += hweight_long(*bptr);
421 spin_unlock(&files->file_lock);
426 * migrate task to new context
427 * gets vxi, puts old_vxi on change
430 int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
432 struct vx_info *old_vxi;
438 old_vxi = task_get_vx_info(p);
442 vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
443 vxi->vx_id, atomic_read(&vxi->vx_usecnt));
445 if (!(ret = vx_migrate_user(p, vxi))) {
449 openfd = vx_openfd_task(p);
450 nofiles = vx_nofiles_task(p);
453 atomic_dec(&old_vxi->cacct.nr_threads);
454 atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]);
455 atomic_sub(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]);
456 atomic_sub(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]);
458 atomic_inc(&vxi->cacct.nr_threads);
459 atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]);
460 atomic_add(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]);
461 atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]);
462 /* should be handled in set_vx_info !! */
464 clr_vx_info(&p->vx_info);
465 set_vx_info(&p->vx_info, vxi);
470 /* obsoleted by clr/set */
471 // put_vx_info(old_vxi);
474 put_vx_info(old_vxi);
478 int vx_set_init(struct vx_info *vxi, struct task_struct *p)
485 vxi->vx_initpid = p->tgid;
490 /* vserver syscall commands below here */
492 /* taks xid and vx_info functions */
494 #include <asm/uaccess.h>
497 int vc_task_xid(uint32_t id, void __user *data)
502 struct task_struct *tsk;
504 if (!vx_check(0, VX_ADMIN|VX_WATCH))
507 read_lock(&tasklist_lock);
508 tsk = find_task_by_pid(id);
509 xid = (tsk) ? tsk->xid : -ESRCH;
510 read_unlock(&tasklist_lock);
518 int vc_vx_info(uint32_t id, void __user *data)
521 struct vcmd_vx_info_v0 vc_data;
523 if (!vx_check(0, VX_ADMIN))
525 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
528 vxi = locate_vx_info(id);
532 vc_data.xid = vxi->vx_id;
533 vc_data.initpid = vxi->vx_initpid;
536 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
542 /* context functions */
544 int vc_ctx_create(uint32_t xid, void __user *data)
546 struct vx_info *new_vxi;
549 if (!capable(CAP_SYS_ADMIN))
552 if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
558 new_vxi = __loc_vx_info(xid, &ret);
561 if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
566 ret = new_vxi->vx_id;
567 vx_migrate_task(current, new_vxi);
568 /* if this fails, we might end up with a hashed vx_info */
570 put_vx_info(new_vxi);
575 int vc_ctx_migrate(uint32_t id, void __user *data)
579 if (!capable(CAP_SYS_ADMIN))
582 /* dirty hack until Spectator becomes a cap */
588 vxi = locate_vx_info(id);
591 vx_migrate_task(current, vxi);
597 int vc_get_cflags(uint32_t id, void __user *data)
600 struct vcmd_ctx_flags_v0 vc_data;
602 if (!capable(CAP_SYS_ADMIN))
605 vxi = locate_vx_info(id);
609 vc_data.flagword = vxi->vx_flags;
611 /* special STATE flag handling */
612 vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
616 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
621 int vc_set_cflags(uint32_t id, void __user *data)
624 struct vcmd_ctx_flags_v0 vc_data;
625 uint64_t mask, trigger;
627 if (!capable(CAP_SYS_ADMIN))
629 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
632 vxi = locate_vx_info(id);
636 /* special STATE flag handling */
637 mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
638 trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
640 if (trigger & VXF_STATE_SETUP)
641 vx_mask_bcaps(current);
642 if (trigger & VXF_STATE_INIT)
643 if (vxi == current->vx_info)
644 vx_set_init(vxi, current);
646 vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
647 vc_data.flagword, mask);
652 int vc_get_ccaps(uint32_t id, void __user *data)
655 struct vcmd_ctx_caps_v0 vc_data;
657 if (!capable(CAP_SYS_ADMIN))
660 vxi = locate_vx_info(id);
664 vc_data.bcaps = vxi->vx_bcaps;
665 vc_data.ccaps = vxi->vx_ccaps;
666 vc_data.cmask = ~0UL;
669 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
674 int vc_set_ccaps(uint32_t id, void __user *data)
677 struct vcmd_ctx_caps_v0 vc_data;
679 if (!capable(CAP_SYS_ADMIN))
681 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
684 vxi = locate_vx_info(id);
688 vxi->vx_bcaps &= vc_data.bcaps;
689 vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps,
690 vc_data.ccaps, vc_data.cmask);
695 #include <linux/module.h>
697 EXPORT_SYMBOL_GPL(rcu_free_vx_info);
698 EXPORT_SYMBOL_GPL(vx_info_hash_lock);
699 EXPORT_SYMBOL_GPL(unhash_vx_info);