2 * linux/kernel/vserver/context.c
4 * Virtual Server: Context Support
6 * Copyright (C) 2003-2004 Herbert Pƶtzl
9 * V0.02 vx_ctx_kill syscall command
10 * V0.03 replaced context_info calls
11 * V0.04 redesign of struct (de)alloc
12 * V0.05 rlimit basic implementation
13 * V0.06 task_xid and info commands
14 * V0.07 context flags and caps
15 * V0.08 switch to RCU based hash
19 #include <linux/config.h>
20 #include <linux/slab.h>
21 #include <linux/vserver.h>
22 #include <linux/vserver/legacy.h>
23 #include <linux/vs_base.h>
24 #include <linux/vs_context.h>
25 #include <linux/kernel_stat.h>
26 #include <linux/namespace.h>
27 #include <linux/rcupdate.h>
29 #include <asm/errno.h>
34 * allocate an initialized vx_info struct
35 * doesn't make it visible (hash) */
37 static struct vx_info *__alloc_vx_info(xid_t xid)
39 struct vx_info *new = NULL;
41 vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
43 /* would this benefit from a slab cache? */
44 new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
48 memset (new, 0, sizeof(struct vx_info));
50 INIT_RCU_HEAD(&new->vx_rcu);
51 INIT_HLIST_NODE(&new->vx_hlist);
52 atomic_set(&new->vx_refcnt, 0);
53 atomic_set(&new->vx_usecnt, 0);
55 /* rest of init goes here */
56 vx_info_init_limit(&new->limit);
57 vx_info_init_sched(&new->sched);
58 vx_info_init_cvirt(&new->cvirt);
59 vx_info_init_cacct(&new->cacct);
61 new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
62 new->vx_bcaps = CAP_INIT_EFF_SET;
65 vxdprintk(VXD_CBIT(xid, 0),
66 "alloc_vx_info(%d) = %p", xid, new);
70 /* __dealloc_vx_info()
72 * final disposal of vx_info */
74 static void __dealloc_vx_info(struct vx_info *vxi)
76 vxdprintk(VXD_CBIT(xid, 0),
77 "dealloc_vx_info(%p)", vxi);
79 vxi->vx_hlist.next = LIST_POISON1;
82 if (vxi->vx_namespace)
83 put_namespace(vxi->vx_namespace);
85 put_fs_struct(vxi->vx_fs);
87 vx_info_exit_limit(&vxi->limit);
88 vx_info_exit_sched(&vxi->sched);
89 vx_info_exit_cvirt(&vxi->cvirt);
90 vx_info_exit_cacct(&vxi->cacct);
92 BUG_ON(atomic_read(&vxi->vx_usecnt));
93 BUG_ON(atomic_read(&vxi->vx_refcnt));
99 /* hash table for vx_info hash */
101 #define VX_HASH_SIZE 13
103 struct hlist_head vx_info_hash[VX_HASH_SIZE];
105 static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
108 static inline unsigned int __hashval(xid_t xid)
110 return (xid % VX_HASH_SIZE);
117 * add the vxi to the global hash table
118 * requires the hash_lock to be held */
120 static inline void __hash_vx_info(struct vx_info *vxi)
122 struct hlist_head *head;
124 vxdprintk(VXD_CBIT(xid, 4),
125 "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
127 head = &vx_info_hash[__hashval(vxi->vx_id)];
128 hlist_add_head_rcu(&vxi->vx_hlist, head);
131 /* __unhash_vx_info()
133 * remove the vxi from the global hash table
134 * requires the hash_lock to be held */
136 static inline void __unhash_vx_info(struct vx_info *vxi)
138 vxdprintk(VXD_CBIT(xid, 4),
139 "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
140 hlist_del_rcu(&vxi->vx_hlist);
145 /* __lookup_vx_info()
147 * requires the rcu_read_lock()
148 * doesn't increment the vx_refcnt */
150 static inline struct vx_info *__lookup_vx_info(xid_t xid)
152 struct hlist_head *head = &vx_info_hash[__hashval(xid)];
153 struct hlist_node *pos;
155 hlist_for_each_rcu(pos, head) {
156 struct vx_info *vxi =
157 hlist_entry(pos, struct vx_info, vx_hlist);
159 if (vxi->vx_id == xid) {
169 * find unused dynamic xid
170 * requires the hash_lock to be held */
172 static inline xid_t __vx_dynamic_id(void)
174 static xid_t seq = MAX_S_CONTEXT;
178 if (++seq > MAX_S_CONTEXT)
180 if (!__lookup_vx_info(seq)) {
181 vxdprintk(VXD_CBIT(xid, 4),
182 "__vx_dynamic_id: [#%d]", seq);
185 } while (barrier != seq);
191 * locate or create the requested context
192 * get() it and if new hash it */
194 static struct vx_info * __loc_vx_info(int id, int *err)
196 struct vx_info *new, *vxi = NULL;
198 vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id);
200 if (!(new = __alloc_vx_info(id))) {
205 spin_lock(&vx_info_hash_lock);
207 /* dynamic context requested */
208 if (id == VX_DYNAMIC_ID) {
209 id = __vx_dynamic_id();
211 printk(KERN_ERR "no dynamic context available.\n");
216 /* existing context requested */
217 else if ((vxi = __lookup_vx_info(id))) {
218 /* context in setup is not available */
219 if (vxi->vx_flags & VXF_STATE_SETUP) {
220 vxdprintk(VXD_CBIT(xid, 0),
221 "loc_vx_info(%d) = %p (not available)", id, vxi);
225 vxdprintk(VXD_CBIT(xid, 0),
226 "loc_vx_info(%d) = %p (found)", id, vxi);
233 /* new context requested */
234 vxdprintk(VXD_CBIT(xid, 0),
235 "loc_vx_info(%d) = %p (new)", id, new);
236 __hash_vx_info(get_vx_info(new));
237 vxi = new, new = NULL;
241 spin_unlock(&vx_info_hash_lock);
243 __dealloc_vx_info(new);
253 void rcu_free_vx_info(struct rcu_head *head)
255 struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu);
258 BUG_ON(!vxi || !head);
260 usecnt = atomic_read(&vxi->vx_usecnt);
263 refcnt = atomic_read(&vxi->vx_refcnt);
266 vxdprintk(VXD_CBIT(xid, 3),
267 "rcu_free_vx_info(%p): uc=%d", vxi, usecnt);
269 __dealloc_vx_info(vxi);
271 printk("!!! rcu didn't free\n");
274 void unhash_vx_info(struct vx_info *vxi)
276 spin_lock(&vx_info_hash_lock);
277 __unhash_vx_info(vxi);
278 spin_unlock(&vx_info_hash_lock);
283 * search for a vx_info and get() it
284 * negative id means current */
286 struct vx_info *locate_vx_info(int id)
291 vxi = get_vx_info(current->vx_info);
294 vxi = get_vx_info(__lookup_vx_info(id));
300 /* vx_info_is_hashed()
302 * verify that xid is still hashed */
304 int vx_info_is_hashed(xid_t xid)
309 hashed = (__lookup_vx_info(xid) != NULL);
314 #ifdef CONFIG_VSERVER_LEGACY
317 struct vx_info *alloc_vx_info(xid_t xid)
319 return __alloc_vx_info(xid);
323 struct vx_info *locate_or_create_vx_info(int id)
327 return __loc_vx_info(id, &err);
332 #ifdef CONFIG_PROC_FS
334 #define hlist_for_each_rcu(pos, head) \
335 for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
336 pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
338 int get_xid_list(int index, unsigned int *xids, int size)
340 int hindex, nr_xids = 0;
343 for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
344 struct hlist_head *head = &vx_info_hash[hindex];
345 struct hlist_node *pos;
347 hlist_for_each_rcu(pos, head) {
353 vxi = hlist_entry(pos, struct vx_info, vx_hlist);
354 xids[nr_xids] = vxi->vx_id;
355 if (++nr_xids >= size)
365 int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
367 struct user_struct *new_user, *old_user;
371 new_user = alloc_uid(vxi->vx_id, p->uid);
376 if (new_user != old_user) {
377 atomic_inc(&new_user->processes);
378 atomic_dec(&old_user->processes);
385 void vx_mask_bcaps(struct task_struct *p)
387 struct vx_info *vxi = p->vx_info;
389 p->cap_effective &= vxi->vx_bcaps;
390 p->cap_inheritable &= vxi->vx_bcaps;
391 p->cap_permitted &= vxi->vx_bcaps;
395 #include <linux/file.h>
397 static inline int vx_nofiles_task(struct task_struct *tsk)
399 struct files_struct *files = tsk->files;
400 const unsigned long *obptr;
403 spin_lock(&files->file_lock);
404 obptr = files->open_fds->fds_bits;
405 count = files->max_fds / (sizeof(unsigned long) * 8);
406 for (total = 0; count > 0; count--) {
408 total += hweight_long(*obptr);
411 spin_unlock(&files->file_lock);
417 static inline int vx_openfd_task(struct task_struct *tsk)
419 struct files_struct *files = tsk->files;
420 const unsigned long *bptr;
423 spin_lock(&files->file_lock);
424 bptr = files->open_fds->fds_bits;
425 count = files->max_fds / (sizeof(unsigned long) * 8);
426 for (total = 0; count > 0; count--) {
428 total += hweight_long(*bptr);
431 spin_unlock(&files->file_lock);
438 * migrate task to new context
439 * gets vxi, puts old_vxi on change
442 int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
444 struct vx_info *old_vxi;
450 old_vxi = task_get_vx_info(p);
454 vxdprintk(VXD_CBIT(xid, 5),
455 "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
456 vxi->vx_id, atomic_read(&vxi->vx_usecnt));
458 if (!(ret = vx_migrate_user(p, vxi))) {
462 // openfd = vx_openfd_task(p);
463 nofiles = vx_nofiles_task(p);
466 atomic_dec(&old_vxi->cacct.nr_threads);
467 atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]);
468 atomic_sub(nofiles, &old_vxi->limit.rcur[RLIMIT_NOFILE]);
469 // atomic_sub(openfd, &old_vxi->limit.rcur[RLIMIT_OPENFD]);
471 atomic_inc(&vxi->cacct.nr_threads);
472 atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]);
473 atomic_add(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]);
474 // atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]);
476 vxdprintk(VXD_CBIT(xid, 5),
477 "moved task %p into vxi:%p[#%d]",
480 /* should be handled in set_vx_info !! */
482 clr_vx_info(&p->vx_info);
483 set_vx_info(&p->vx_info, vxi);
488 /* obsoleted by clr/set */
489 // put_vx_info(old_vxi);
492 put_vx_info(old_vxi);
496 int vx_set_init(struct vx_info *vxi, struct task_struct *p)
503 vxi->vx_initpid = p->tgid;
508 /* vserver syscall commands below here */
510 /* taks xid and vx_info functions */
512 #include <asm/uaccess.h>
515 int vc_task_xid(uint32_t id, void __user *data)
520 struct task_struct *tsk;
522 if (!vx_check(0, VX_ADMIN|VX_WATCH))
525 read_lock(&tasklist_lock);
526 tsk = find_task_by_pid(id);
527 xid = (tsk) ? tsk->xid : -ESRCH;
528 read_unlock(&tasklist_lock);
536 int vc_vx_info(uint32_t id, void __user *data)
539 struct vcmd_vx_info_v0 vc_data;
541 if (!vx_check(0, VX_ADMIN))
543 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
546 vxi = locate_vx_info(id);
550 vc_data.xid = vxi->vx_id;
551 vc_data.initpid = vxi->vx_initpid;
554 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
560 /* context functions */
562 int vc_ctx_create(uint32_t xid, void __user *data)
564 struct vx_info *new_vxi;
567 if (!capable(CAP_SYS_ADMIN))
570 if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
576 new_vxi = __loc_vx_info(xid, &ret);
579 if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
584 ret = new_vxi->vx_id;
585 vx_migrate_task(current, new_vxi);
586 /* if this fails, we might end up with a hashed vx_info */
588 put_vx_info(new_vxi);
593 int vc_ctx_migrate(uint32_t id, void __user *data)
597 if (!capable(CAP_SYS_ADMIN))
600 /* dirty hack until Spectator becomes a cap */
606 vxi = locate_vx_info(id);
609 vx_migrate_task(current, vxi);
615 int vc_get_cflags(uint32_t id, void __user *data)
618 struct vcmd_ctx_flags_v0 vc_data;
620 if (!capable(CAP_SYS_ADMIN))
623 vxi = locate_vx_info(id);
627 vc_data.flagword = vxi->vx_flags;
629 /* special STATE flag handling */
630 vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
634 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
639 int vc_set_cflags(uint32_t id, void __user *data)
642 struct vcmd_ctx_flags_v0 vc_data;
643 uint64_t mask, trigger;
645 if (!capable(CAP_SYS_ADMIN))
647 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
650 vxi = locate_vx_info(id);
654 /* special STATE flag handling */
655 mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
656 trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
658 if (trigger & VXF_STATE_SETUP)
659 vx_mask_bcaps(current);
660 if (trigger & VXF_STATE_INIT)
661 if (vxi == current->vx_info)
662 vx_set_init(vxi, current);
664 vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
665 vc_data.flagword, mask);
670 int vc_get_ccaps(uint32_t id, void __user *data)
673 struct vcmd_ctx_caps_v0 vc_data;
675 if (!capable(CAP_SYS_ADMIN))
678 vxi = locate_vx_info(id);
682 vc_data.bcaps = vxi->vx_bcaps;
683 vc_data.ccaps = vxi->vx_ccaps;
684 vc_data.cmask = ~0UL;
687 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
692 int vc_set_ccaps(uint32_t id, void __user *data)
695 struct vcmd_ctx_caps_v0 vc_data;
697 if (!capable(CAP_SYS_ADMIN))
699 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
702 vxi = locate_vx_info(id);
706 vxi->vx_bcaps &= vc_data.bcaps;
707 vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps,
708 vc_data.ccaps, vc_data.cmask);
713 #include <linux/module.h>
715 EXPORT_SYMBOL_GPL(rcu_free_vx_info);
716 EXPORT_SYMBOL_GPL(vx_info_hash_lock);
717 EXPORT_SYMBOL_GPL(unhash_vx_info);