2 * linux/kernel/vserver/context.c
4 * Virtual Server: Context Support
6 * Copyright (C) 2003-2004 Herbert Pƶtzl
9 * V0.02 vx_ctx_kill syscall command
10 * V0.03 replaced context_info calls
11 * V0.04 redesign of struct (de)alloc
12 * V0.05 rlimit basic implementation
13 * V0.06 task_xid and info commands
14 * V0.07 context flags and caps
15 * V0.08 switch to RCU based hash
19 #include <linux/config.h>
20 #include <linux/slab.h>
21 #include <linux/vserver.h>
22 #include <linux/vserver/legacy.h>
23 #include <linux/vs_base.h>
24 #include <linux/vs_context.h>
25 #include <linux/kernel_stat.h>
26 #include <linux/namespace.h>
27 #include <linux/rcupdate.h>
29 #include <asm/errno.h>
34 * allocate an initialized vx_info struct
35 * doesn't make it visible (hash) */
37 static struct vx_info *__alloc_vx_info(xid_t xid)
39 struct vx_info *new = NULL;
41 vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
43 /* would this benefit from a slab cache? */
44 new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
48 memset (new, 0, sizeof(struct vx_info));
50 INIT_RCU_HEAD(&new->vx_rcu);
51 INIT_HLIST_NODE(&new->vx_hlist);
52 atomic_set(&new->vx_refcnt, 0);
53 atomic_set(&new->vx_usecnt, 0);
54 new->vx_parent = NULL;
56 new->vx_lock = SPIN_LOCK_UNLOCKED;
57 init_waitqueue_head(&new->vx_exit);
59 /* rest of init goes here */
60 vx_info_init_limit(&new->limit);
61 vx_info_init_sched(&new->sched);
62 vx_info_init_cvirt(&new->cvirt);
63 vx_info_init_cacct(&new->cacct);
66 new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
67 new->vx_bcaps = CAP_INIT_EFF_SET;
70 vxdprintk(VXD_CBIT(xid, 0),
71 "alloc_vx_info(%d) = %p", xid, new);
75 /* __dealloc_vx_info()
77 * final disposal of vx_info */
79 static void __dealloc_vx_info(struct vx_info *vxi)
81 vxdprintk(VXD_CBIT(xid, 0),
82 "dealloc_vx_info(%p)", vxi);
84 vxi->vx_hlist.next = LIST_POISON1;
87 vx_info_exit_limit(&vxi->limit);
88 vx_info_exit_sched(&vxi->sched);
89 vx_info_exit_cvirt(&vxi->cvirt);
90 vx_info_exit_cacct(&vxi->cacct);
93 BUG_ON(atomic_read(&vxi->vx_usecnt));
94 BUG_ON(atomic_read(&vxi->vx_refcnt));
96 BUG_ON(vx_info_state(vxi, VXS_HASHED));
97 // BUG_ON(!vx_state(vxi, VXS_DEFUNCT));
99 vxi->vx_state |= VXS_RELEASED;
103 static inline int __free_vx_info(struct vx_info *vxi)
109 usecnt = atomic_read(&vxi->vx_usecnt);
112 refcnt = atomic_read(&vxi->vx_refcnt);
116 __dealloc_vx_info(vxi);
122 static void __rcu_free_vx_info(struct rcu_head *head)
124 struct vx_info *vxi = container_of(head, struct vx_info, vx_rcu);
127 vxdprintk(VXD_CBIT(xid, 3),
128 "rcu_free_vx_info(%p): uc=%d", vxi,
129 atomic_read(&vxi->vx_usecnt));
136 void free_vx_info(struct vx_info *vxi)
138 struct namespace *namespace;
139 struct fs_struct *fs;
141 /* context shutdown is mandatory */
142 // BUG_ON(vxi->vx_state != VXS_SHUTDOWN);
144 namespace = xchg(&vxi->vx_namespace, NULL);
145 fs = xchg(&vxi->vx_fs, NULL);
148 put_namespace(namespace);
152 BUG_ON(__free_vx_info(vxi));
153 // call_rcu(&i->vx_rcu, __rcu_free_vx_info);
157 /* hash table for vx_info hash */
159 #define VX_HASH_SIZE 13
161 struct hlist_head vx_info_hash[VX_HASH_SIZE];
163 static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
166 static inline unsigned int __hashval(xid_t xid)
168 return (xid % VX_HASH_SIZE);
175 * add the vxi to the global hash table
176 * requires the hash_lock to be held */
178 static inline void __hash_vx_info(struct vx_info *vxi)
180 struct hlist_head *head;
182 vxdprintk(VXD_CBIT(xid, 4),
183 "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
185 vxi->vx_state |= VXS_HASHED;
186 head = &vx_info_hash[__hashval(vxi->vx_id)];
187 hlist_add_head_rcu(&vxi->vx_hlist, head);
190 /* __unhash_vx_info()
192 * remove the vxi from the global hash table
193 * requires the hash_lock to be held */
195 static inline void __unhash_vx_info(struct vx_info *vxi)
197 vxdprintk(VXD_CBIT(xid, 4),
198 "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
199 vxi->vx_state &= ~VXS_HASHED;
200 hlist_del_rcu(&vxi->vx_hlist);
205 /* __lookup_vx_info()
207 * requires the rcu_read_lock()
208 * doesn't increment the vx_refcnt */
210 static inline struct vx_info *__lookup_vx_info(xid_t xid)
212 struct hlist_head *head = &vx_info_hash[__hashval(xid)];
213 struct hlist_node *pos;
215 hlist_for_each_rcu(pos, head) {
216 struct vx_info *vxi =
217 hlist_entry(pos, struct vx_info, vx_hlist);
219 if ((vxi->vx_id == xid) &&
220 vx_info_state(vxi, VXS_HASHED))
229 * find unused dynamic xid
230 * requires the hash_lock to be held */
232 static inline xid_t __vx_dynamic_id(void)
234 static xid_t seq = MAX_S_CONTEXT;
238 if (++seq > MAX_S_CONTEXT)
240 if (!__lookup_vx_info(seq)) {
241 vxdprintk(VXD_CBIT(xid, 4),
242 "__vx_dynamic_id: [#%d]", seq);
245 } while (barrier != seq);
251 * locate or create the requested context
252 * get() it and if new hash it */
254 static struct vx_info * __loc_vx_info(int id, int *err)
256 struct vx_info *new, *vxi = NULL;
258 vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id);
260 if (!(new = __alloc_vx_info(id))) {
265 spin_lock(&vx_info_hash_lock);
267 /* dynamic context requested */
268 if (id == VX_DYNAMIC_ID) {
269 id = __vx_dynamic_id();
271 printk(KERN_ERR "no dynamic context available.\n");
276 /* existing context requested */
277 else if ((vxi = __lookup_vx_info(id))) {
278 /* context in setup is not available */
279 if (vxi->vx_flags & VXF_STATE_SETUP) {
280 vxdprintk(VXD_CBIT(xid, 0),
281 "loc_vx_info(%d) = %p (not available)", id, vxi);
285 vxdprintk(VXD_CBIT(xid, 0),
286 "loc_vx_info(%d) = %p (found)", id, vxi);
293 /* new context requested */
294 vxdprintk(VXD_CBIT(xid, 0),
295 "loc_vx_info(%d) = %p (new)", id, new);
296 __hash_vx_info(get_vx_info(new));
297 vxi = new, new = NULL;
301 spin_unlock(&vx_info_hash_lock);
303 __dealloc_vx_info(new);
312 void unhash_vx_info(struct vx_info *vxi)
314 spin_lock(&vx_info_hash_lock);
315 __unhash_vx_info(vxi);
316 spin_unlock(&vx_info_hash_lock);
321 * search for a vx_info and get() it
322 * negative id means current */
324 struct vx_info *locate_vx_info(int id)
329 vxi = get_vx_info(current->vx_info);
332 vxi = get_vx_info(__lookup_vx_info(id));
338 /* vx_info_is_hashed()
340 * verify that xid is still hashed */
342 int vx_info_is_hashed(xid_t xid)
347 hashed = (__lookup_vx_info(xid) != NULL);
352 #ifdef CONFIG_VSERVER_LEGACY
355 struct vx_info *alloc_vx_info(xid_t xid)
357 return __alloc_vx_info(xid);
361 struct vx_info *locate_or_create_vx_info(int id)
365 return __loc_vx_info(id, &err);
370 #ifdef CONFIG_PROC_FS
372 int get_xid_list(int index, unsigned int *xids, int size)
374 int hindex, nr_xids = 0;
377 for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
378 struct hlist_head *head = &vx_info_hash[hindex];
379 struct hlist_node *pos;
381 hlist_for_each_rcu(pos, head) {
387 vxi = hlist_entry(pos, struct vx_info, vx_hlist);
388 xids[nr_xids] = vxi->vx_id;
389 if (++nr_xids >= size)
399 int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
401 struct user_struct *new_user, *old_user;
405 new_user = alloc_uid(vxi->vx_id, p->uid);
410 if (new_user != old_user) {
411 atomic_inc(&new_user->processes);
412 atomic_dec(&old_user->processes);
419 void vx_mask_bcaps(struct task_struct *p)
421 struct vx_info *vxi = p->vx_info;
423 p->cap_effective &= vxi->vx_bcaps;
424 p->cap_inheritable &= vxi->vx_bcaps;
425 p->cap_permitted &= vxi->vx_bcaps;
429 #include <linux/file.h>
431 static inline int vx_nofiles_task(struct task_struct *tsk)
433 struct files_struct *files = tsk->files;
434 unsigned long *obptr;
437 spin_lock(&files->file_lock);
438 obptr = files->open_fds->fds_bits;
439 count = files->max_fds / (sizeof(unsigned long) * 8);
440 for (total = 0; count > 0; count--) {
442 total += hweight_long(*obptr);
445 spin_unlock(&files->file_lock);
451 static inline int vx_openfd_task(struct task_struct *tsk)
453 struct files_struct *files = tsk->files;
454 const unsigned long *bptr;
457 spin_lock(&files->file_lock);
458 bptr = files->open_fds->fds_bits;
459 count = files->max_fds / (sizeof(unsigned long) * 8);
460 for (total = 0; count > 0; count--) {
462 total += hweight_long(*bptr);
465 spin_unlock(&files->file_lock);
472 * migrate task to new context
473 * gets vxi, puts old_vxi on change
476 int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
478 struct vx_info *old_vxi;
484 old_vxi = task_get_vx_info(p);
488 vxdprintk(VXD_CBIT(xid, 5),
489 "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
490 vxi->vx_id, atomic_read(&vxi->vx_usecnt));
492 if (!(ret = vx_migrate_user(p, vxi))) {
496 // openfd = vx_openfd_task(p);
497 nofiles = vx_nofiles_task(p);
500 atomic_dec(&old_vxi->cvirt.nr_threads);
501 atomic_dec(&old_vxi->cvirt.nr_running);
502 atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]);
503 /* FIXME: what about the struct files here? */
504 // atomic_sub(nofiles, &old_vxi->limit.rcur[RLIMIT_NOFILE]);
505 // atomic_sub(openfd, &old_vxi->limit.rcur[RLIMIT_OPENFD]);
507 atomic_inc(&vxi->cvirt.nr_threads);
508 atomic_inc(&vxi->cvirt.nr_running);
509 atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]);
510 /* FIXME: what about the struct files here? */
511 // atomic_add(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]);
512 // atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]);
514 vxdprintk(VXD_CBIT(xid, 5),
515 "moved task %p into vxi:%p[#%d]",
518 /* should be handled in set_vx_info !! */
520 clr_vx_info(&p->vx_info);
521 set_vx_info(&p->vx_info, vxi);
526 /* obsoleted by clr/set */
527 // put_vx_info(old_vxi);
530 put_vx_info(old_vxi);
534 int vx_set_init(struct vx_info *vxi, struct task_struct *p)
541 vxdprintk(VXD_CBIT(xid, 6),
542 "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
543 vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
545 vxi->vx_initpid = p->tgid;
550 /* vserver syscall commands below here */
552 /* taks xid and vx_info functions */
554 #include <asm/uaccess.h>
557 int vc_task_xid(uint32_t id, void __user *data)
562 struct task_struct *tsk;
564 if (!vx_check(0, VX_ADMIN|VX_WATCH))
567 read_lock(&tasklist_lock);
568 tsk = find_task_by_real_pid(id);
569 xid = (tsk) ? tsk->xid : -ESRCH;
570 read_unlock(&tasklist_lock);
578 int vc_vx_info(uint32_t id, void __user *data)
581 struct vcmd_vx_info_v0 vc_data;
583 if (!vx_check(0, VX_ADMIN))
585 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
588 vxi = locate_vx_info(id);
592 vc_data.xid = vxi->vx_id;
593 vc_data.initpid = vxi->vx_initpid;
596 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
602 /* context functions */
604 int vc_ctx_create(uint32_t xid, void __user *data)
606 struct vx_info *new_vxi;
609 if (!capable(CAP_SYS_ADMIN))
612 if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
618 new_vxi = __loc_vx_info(xid, &ret);
621 if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
626 ret = new_vxi->vx_id;
627 vx_migrate_task(current, new_vxi);
628 /* if this fails, we might end up with a hashed vx_info */
630 put_vx_info(new_vxi);
635 int vc_ctx_migrate(uint32_t id, void __user *data)
639 if (!capable(CAP_SYS_ADMIN))
642 /* dirty hack until Spectator becomes a cap */
648 vxi = locate_vx_info(id);
651 vx_migrate_task(current, vxi);
657 int vc_get_cflags(uint32_t id, void __user *data)
660 struct vcmd_ctx_flags_v0 vc_data;
662 if (!capable(CAP_SYS_ADMIN))
665 vxi = locate_vx_info(id);
669 vc_data.flagword = vxi->vx_flags;
671 /* special STATE flag handling */
672 vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
676 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
681 int vc_set_cflags(uint32_t id, void __user *data)
684 struct vcmd_ctx_flags_v0 vc_data;
685 uint64_t mask, trigger;
687 if (!capable(CAP_SYS_ADMIN))
689 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
692 vxi = locate_vx_info(id);
696 /* special STATE flag handling */
697 mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
698 trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
700 if (trigger & VXF_STATE_SETUP)
701 vx_mask_bcaps(current);
702 if (trigger & VXF_STATE_INIT)
703 if (vxi == current->vx_info)
704 vx_set_init(vxi, current);
706 vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
707 vc_data.flagword, mask);
712 int vc_get_ccaps(uint32_t id, void __user *data)
715 struct vcmd_ctx_caps_v0 vc_data;
717 if (!capable(CAP_SYS_ADMIN))
720 vxi = locate_vx_info(id);
724 vc_data.bcaps = vxi->vx_bcaps;
725 vc_data.ccaps = vxi->vx_ccaps;
726 vc_data.cmask = ~0UL;
729 if (copy_to_user (data, &vc_data, sizeof(vc_data)))
734 int vc_set_ccaps(uint32_t id, void __user *data)
737 struct vcmd_ctx_caps_v0 vc_data;
739 if (!capable(CAP_SYS_ADMIN))
741 if (copy_from_user (&vc_data, data, sizeof(vc_data)))
744 vxi = locate_vx_info(id);
748 vxi->vx_bcaps &= vc_data.bcaps;
749 vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps,
750 vc_data.ccaps, vc_data.cmask);
755 #include <linux/module.h>
757 // EXPORT_SYMBOL_GPL(rcu_free_vx_info);
758 EXPORT_SYMBOL_GPL(free_vx_info);
759 EXPORT_SYMBOL_GPL(vx_info_hash_lock);
760 EXPORT_SYMBOL_GPL(unhash_vx_info);