X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fvserver%2Fcontext.c;h=edcfebbbd6eeb9445999164a3edc074332051ab9;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=533e104396b0d4c035f38d7a5eb9a5b94ea76339;hpb=9213980e6a70d8473e0ffd4b39ab5b6caaba9ff5;p=linux-2.6.git diff --git a/kernel/vserver/context.c b/kernel/vserver/context.c index 533e10439..edcfebbbd 100644 --- a/kernel/vserver/context.c +++ b/kernel/vserver/context.c @@ -3,7 +3,7 @@ * * Virtual Server: Context Support * - * Copyright (C) 2003-2004 Herbert Pötzl + * Copyright (C) 2003-2005 Herbert Pötzl * * V0.01 context helper * V0.02 vx_ctx_kill syscall command @@ -13,21 +13,33 @@ * V0.06 task_xid and info commands * V0.07 context flags and caps * V0.08 switch to RCU based hash + * V0.09 revert to non RCU for now + * V0.10 and back to working RCU hash + * V0.11 and back to locking again * */ #include #include -#include +#include +#include + +#include +#include #include -#include +#include +#include + #include -#include -#include -#include +#include +#include #include +#include "cvirt_init.h" +#include "limit_init.h" +#include "sched_init.h" + /* __alloc_vx_info() @@ -37,8 +49,8 @@ static struct vx_info *__alloc_vx_info(xid_t xid) { struct vx_info *new = NULL; - - vxdprintk("alloc_vx_info(%d)\n", xid); + + vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid); /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); @@ -47,10 +59,14 @@ static struct vx_info *__alloc_vx_info(xid_t xid) memset (new, 0, sizeof(struct vx_info)); new->vx_id = xid; - INIT_RCU_HEAD(&new->vx_rcu); + // INIT_RCU_HEAD(&new->vx_rcu); INIT_HLIST_NODE(&new->vx_hlist); - atomic_set(&new->vx_refcnt, 0); atomic_set(&new->vx_usecnt, 0); + atomic_set(&new->vx_tasks, 0); + new->vx_parent = NULL; + new->vx_state = 0; + new->vx_lock = SPIN_LOCK_UNLOCKED; + init_waitqueue_head(&new->vx_exit); /* rest of init goes here */ vx_info_init_limit(&new->limit); @@ -58,11 +74,14 @@ static struct vx_info *__alloc_vx_info(xid_t xid) vx_info_init_cvirt(&new->cvirt); vx_info_init_cacct(&new->cacct); + new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT; new->vx_bcaps = CAP_INIT_EFF_SET; new->vx_ccaps = 0; - vxdprintk("alloc_vx_info(%d) = %p\n", xid, new); + vxdprintk(VXD_CBIT(xid, 0), + "alloc_vx_info(%d) = %p", xid, new); + vxh_alloc_vx_info(new); return new; } @@ -72,31 +91,61 @@ static struct vx_info *__alloc_vx_info(xid_t xid) static void __dealloc_vx_info(struct vx_info *vxi) { - vxdprintk("dealloc_vx_info(%p)\n", vxi); + vxdprintk(VXD_CBIT(xid, 0), + "dealloc_vx_info(%p)", vxi); + vxh_dealloc_vx_info(vxi); vxi->vx_hlist.next = LIST_POISON1; vxi->vx_id = -1; - if (vxi->vx_namespace) - put_namespace(vxi->vx_namespace); - if (vxi->vx_fs) - put_fs_struct(vxi->vx_fs); - vx_info_exit_limit(&vxi->limit); vx_info_exit_sched(&vxi->sched); vx_info_exit_cvirt(&vxi->cvirt); vx_info_exit_cacct(&vxi->cacct); - - BUG_ON(atomic_read(&vxi->vx_usecnt)); - BUG_ON(atomic_read(&vxi->vx_refcnt)); + vxi->vx_state |= VXS_RELEASED; kfree(vxi); } +void __shutdown_vx_info(struct vx_info *vxi) +{ + struct namespace *namespace; + struct fs_struct *fs; + + might_sleep(); + + namespace = xchg(&vxi->vx_namespace, NULL); + if (namespace) + put_namespace(namespace); + + fs = xchg(&vxi->vx_fs, NULL); + if (fs) + put_fs_struct(fs); +} + +/* exported stuff */ + +void free_vx_info(struct vx_info *vxi) +{ + /* context shutdown is mandatory */ + // BUG_ON(vxi->vx_state != VXS_SHUTDOWN); + + BUG_ON(atomic_read(&vxi->vx_usecnt)); + BUG_ON(atomic_read(&vxi->vx_tasks)); + + BUG_ON(vx_info_state(vxi, VXS_HASHED)); + // BUG_ON(!vx_state(vxi, VXS_DEFUNCT)); + + BUG_ON(vxi->vx_namespace); + BUG_ON(vxi->vx_fs); + + __dealloc_vx_info(vxi); +} + /* hash table for vx_info hash */ -#define VX_HASH_SIZE 13 +#define VX_HASH_SIZE 13 struct hlist_head vx_info_hash[VX_HASH_SIZE]; @@ -118,11 +167,19 @@ static inline unsigned int __hashval(xid_t xid) static inline void __hash_vx_info(struct vx_info *vxi) { struct hlist_head *head; - - vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); + + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_hash_vx_info(vxi); + + /* context must not be hashed */ + BUG_ON(vxi->vx_state & VXS_HASHED); + get_vx_info(vxi); + vxi->vx_state |= VXS_HASHED; head = &vx_info_hash[__hashval(vxi->vx_id)]; - hlist_add_head_rcu(&vxi->vx_hlist, head); + hlist_add_head(&vxi->vx_hlist, head); } /* __unhash_vx_info() @@ -132,31 +189,46 @@ static inline void __hash_vx_info(struct vx_info *vxi) static inline void __unhash_vx_info(struct vx_info *vxi) { - vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); - hlist_del_rcu(&vxi->vx_hlist); + vxd_assert_lock(&vx_info_hash_lock); + vxdprintk(VXD_CBIT(xid, 4), + "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id); + vxh_unhash_vx_info(vxi); + + /* maybe warn on that? */ + if (!(vxi->vx_state & VXS_HASHED)) + return; + + vxi->vx_state &= ~VXS_HASHED; + hlist_del(&vxi->vx_hlist); put_vx_info(vxi); } /* __lookup_vx_info() - * requires the rcu_read_lock() + * requires the hash_lock to be held * doesn't increment the vx_refcnt */ static inline struct vx_info *__lookup_vx_info(xid_t xid) { struct hlist_head *head = &vx_info_hash[__hashval(xid)]; struct hlist_node *pos; + struct vx_info *vxi; - hlist_for_each_rcu(pos, head) { - struct vx_info *vxi = - hlist_entry(pos, struct vx_info, vx_hlist); + vxd_assert_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { + vxi = hlist_entry(pos, struct vx_info, vx_hlist); - if (vxi->vx_id == xid) { - return vxi; - } + if (vxi->vx_id == xid) + goto found; } - return NULL; + vxi = NULL; +found: + vxdprintk(VXD_CBIT(xid, 0), + "__lookup_vx_info(#%u): %p[#%u]", + xid, vxi, vxi?vxi->vx_id:0); + vxh_lookup_vx_info(xid, vxi); + return vxi; } @@ -169,16 +241,22 @@ static inline xid_t __vx_dynamic_id(void) { static xid_t seq = MAX_S_CONTEXT; xid_t barrier = seq; - + + vxd_assert_lock(&vx_info_hash_lock); do { if (++seq > MAX_S_CONTEXT) seq = MIN_D_CONTEXT; - if (!__lookup_vx_info(seq)) + if (!__lookup_vx_info(seq)) { + vxdprintk(VXD_CBIT(xid, 4), + "__vx_dynamic_id: [#%d]", seq); return seq; + } } while (barrier != seq); return 0; } +#ifdef CONFIG_VSERVER_LEGACY + /* __loc_vx_info() * locate or create the requested context @@ -187,14 +265,15 @@ static inline xid_t __vx_dynamic_id(void) static struct vx_info * __loc_vx_info(int id, int *err) { struct vx_info *new, *vxi = NULL; - - vxdprintk("loc_vx_info(%d)\n", id); + + vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id); if (!(new = __alloc_vx_info(id))) { *err = -ENOMEM; return NULL; } + /* required to make dynamic xids unique */ spin_lock(&vx_info_hash_lock); /* dynamic context requested */ @@ -210,11 +289,13 @@ static struct vx_info * __loc_vx_info(int id, int *err) else if ((vxi = __lookup_vx_info(id))) { /* context in setup is not available */ if (vxi->vx_flags & VXF_STATE_SETUP) { - vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi); + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (not available)", id, vxi); vxi = NULL; *err = -EBUSY; } else { - vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi); + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (found)", id, vxi); get_vx_info(vxi); *err = 0; } @@ -222,92 +303,130 @@ static struct vx_info * __loc_vx_info(int id, int *err) } /* new context requested */ - vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new); + vxdprintk(VXD_CBIT(xid, 0), + "loc_vx_info(%d) = %p (new)", id, new); __hash_vx_info(get_vx_info(new)); vxi = new, new = NULL; *err = 1; out_unlock: spin_unlock(&vx_info_hash_lock); + vxh_loc_vx_info(id, vxi); if (new) __dealloc_vx_info(new); return vxi; } +#endif +/* __create_vx_info() -/* exported stuff */ + * create the requested context + * get() it and hash it */ +static struct vx_info * __create_vx_info(int id) +{ + struct vx_info *new, *vxi = NULL; + vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id); -void rcu_free_vx_info(void *obj) -{ - struct vx_info *vxi = obj; - int usecnt, refcnt; + if (!(new = __alloc_vx_info(id))) { + return ERR_PTR(-ENOMEM); + } - BUG_ON(!vxi); + /* required to make dynamic xids unique */ + spin_lock(&vx_info_hash_lock); - usecnt = atomic_read(&vxi->vx_usecnt); - BUG_ON(usecnt < 0); + /* dynamic context requested */ + if (id == VX_DYNAMIC_ID) { + id = __vx_dynamic_id(); + if (!id) { + printk(KERN_ERR "no dynamic context available.\n"); + vxi = ERR_PTR(-EAGAIN); + goto out_unlock; + } + new->vx_id = id; + } + /* existing context requested */ + else if ((vxi = __lookup_vx_info(id))) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (already there)", id, vxi); + if (vx_info_flags(vxi, VXF_STATE_SETUP, 0)) + vxi = ERR_PTR(-EBUSY); + else + vxi = ERR_PTR(-EEXIST); + goto out_unlock; + } + /* dynamic xid creation blocker */ + else if (id >= MIN_D_CONTEXT) { + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) (dynamic rejected)", id); + vxi = ERR_PTR(-EINVAL); + goto out_unlock; + } - refcnt = atomic_read(&vxi->vx_refcnt); - BUG_ON(refcnt < 0); + /* new context requested */ + vxdprintk(VXD_CBIT(xid, 0), + "create_vx_info(%d) = %p (new)", id, new); + __hash_vx_info(get_vx_info(new)); + vxi = new, new = NULL; - if (!usecnt) - __dealloc_vx_info(vxi); - else - printk("!!! rcu didn't free\n"); +out_unlock: + spin_unlock(&vx_info_hash_lock); + vxh_create_vx_info(id, IS_ERR(vxi)?NULL:vxi); + if (new) + __dealloc_vx_info(new); + return vxi; } + +/* exported stuff */ + + void unhash_vx_info(struct vx_info *vxi) { + __shutdown_vx_info(vxi); spin_lock(&vx_info_hash_lock); __unhash_vx_info(vxi); spin_unlock(&vx_info_hash_lock); } + /* locate_vx_info() - * search for a vx_info and get() it + * search for a vx_info and get() it * negative id means current */ struct vx_info *locate_vx_info(int id) { - struct vx_info *vxi; - + struct vx_info *vxi = NULL; + if (id < 0) { vxi = get_vx_info(current->vx_info); - } else { - rcu_read_lock(); + } else if (id > 1) { + spin_lock(&vx_info_hash_lock); vxi = get_vx_info(__lookup_vx_info(id)); - rcu_read_unlock(); + spin_unlock(&vx_info_hash_lock); } return vxi; } -/* vx_info_is_hashed() +/* xid_is_hashed() * verify that xid is still hashed */ -int vx_info_is_hashed(xid_t xid) +int xid_is_hashed(xid_t xid) { int hashed; - rcu_read_lock(); + spin_lock(&vx_info_hash_lock); hashed = (__lookup_vx_info(xid) != NULL); - rcu_read_unlock(); + spin_unlock(&vx_info_hash_lock); return hashed; } #ifdef CONFIG_VSERVER_LEGACY -#if 0 -struct vx_info *alloc_vx_info(xid_t xid) -{ - return __alloc_vx_info(xid); -} -#endif - struct vx_info *locate_or_create_vx_info(int id) { int err; @@ -319,33 +438,32 @@ struct vx_info *locate_or_create_vx_info(int id) #ifdef CONFIG_PROC_FS -#define hlist_for_each_rcu(pos, head) \ - for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \ - pos = pos->next, ({ smp_read_barrier_depends(); 0;})) - int get_xid_list(int index, unsigned int *xids, int size) { int hindex, nr_xids = 0; - rcu_read_lock(); for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) { struct hlist_head *head = &vx_info_hash[hindex]; struct hlist_node *pos; - hlist_for_each_rcu(pos, head) { + spin_lock(&vx_info_hash_lock); + hlist_for_each(pos, head) { struct vx_info *vxi; if (--index > 0) continue; vxi = hlist_entry(pos, struct vx_info, vx_hlist); - xids[nr_xids] = vxi->vx_id; - if (++nr_xids >= size) + xids[nr_xids] = vxi->vx_id; + if (++nr_xids >= size) { + spin_unlock(&vx_info_hash_lock); goto out; + } } + /* keep the lock time short */ + spin_unlock(&vx_info_hash_lock); } out: - rcu_read_unlock(); return nr_xids; } #endif @@ -353,7 +471,7 @@ out: int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) { struct user_struct *new_user, *old_user; - + if (!p || !vxi) BUG(); new_user = alloc_uid(vxi->vx_id, p->uid); @@ -382,29 +500,7 @@ void vx_mask_bcaps(struct task_struct *p) #include -static inline int vx_nofiles_task(struct task_struct *tsk) -{ - struct files_struct *files = tsk->files; - const unsigned long *obptr, *cbptr; - int count, total; - - spin_lock(&files->file_lock); - obptr = files->open_fds->fds_bits; - cbptr = files->close_on_exec->fds_bits; - count = files->max_fds / (sizeof(unsigned long) * 8); - for (total = 0; count > 0; count--) { - if (*obptr) - total += hweight_long(*obptr); - obptr++; - /* if (*cbptr) - total += hweight_long(*cbptr); - cbptr++; */ - } - spin_unlock(&files->file_lock); - return total; -} - -static inline int vx_openfd_task(struct task_struct *tsk) +static int vx_openfd_task(struct task_struct *tsk) { struct files_struct *files = tsk->files; const unsigned long *bptr; @@ -431,7 +527,7 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) { struct vx_info *old_vxi; int ret = 0; - + if (!p || !vxi) BUG(); @@ -439,36 +535,43 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) if (old_vxi == vxi) goto out; - vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, + vxdprintk(VXD_CBIT(xid, 5), + "vx_migrate_task(%p,%p[#%d.%d])", p, vxi, vxi->vx_id, atomic_read(&vxi->vx_usecnt)); if (!(ret = vx_migrate_user(p, vxi))) { - int openfd, nofiles; + int openfd; task_lock(p); openfd = vx_openfd_task(p); - nofiles = vx_nofiles_task(p); if (old_vxi) { - atomic_dec(&old_vxi->cacct.nr_threads); + atomic_dec(&old_vxi->cvirt.nr_threads); + atomic_dec(&old_vxi->cvirt.nr_running); atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]); - atomic_sub(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]); - atomic_sub(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]); - } - atomic_inc(&vxi->cacct.nr_threads); + /* FIXME: what about the struct files here? */ + atomic_sub(openfd, &old_vxi->limit.rcur[VLIMIT_OPENFD]); + } + atomic_inc(&vxi->cvirt.nr_threads); + atomic_inc(&vxi->cvirt.nr_running); atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]); - atomic_add(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]); - atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]); - /* should be handled in set_vx_info !! */ - if (old_vxi) + /* FIXME: what about the struct files here? */ + atomic_add(openfd, &vxi->limit.rcur[VLIMIT_OPENFD]); + + if (old_vxi) { + release_vx_info(old_vxi, p); clr_vx_info(&p->vx_info); + } + claim_vx_info(vxi, p); set_vx_info(&p->vx_info, vxi); p->xid = vxi->vx_id; + + vxdprintk(VXD_CBIT(xid, 5), + "moved task %p into vxi:%p[#%d]", + p, vxi, vxi->vx_id); + vx_mask_bcaps(p); task_unlock(p); - - /* obsoleted by clr/set */ - // put_vx_info(old_vxi); } out: put_vx_info(old_vxi); @@ -479,10 +582,14 @@ int vx_set_init(struct vx_info *vxi, struct task_struct *p) { if (!vxi) return -EINVAL; - if (vxi->vx_initpid) - return -EPERM; + if (vxi->vx_initpid) + return -EPERM; + + vxdprintk(VXD_CBIT(xid, 6), + "vx_set_init(%p[#%d],%p[#%d,%d,%d])", + vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid); - vxi->vx_initpid = p->tgid; + vxi->vx_initpid = p->tgid; return 0; } @@ -496,22 +603,22 @@ int vx_set_init(struct vx_info *vxi, struct task_struct *p) int vc_task_xid(uint32_t id, void __user *data) { - xid_t xid; - - if (id) { - struct task_struct *tsk; - - if (!vx_check(0, VX_ADMIN|VX_WATCH)) - return -EPERM; - - read_lock(&tasklist_lock); - tsk = find_task_by_pid(id); - xid = (tsk) ? tsk->xid : -ESRCH; - read_unlock(&tasklist_lock); - } - else - xid = current->xid; - return xid; + xid_t xid; + + if (id) { + struct task_struct *tsk; + + if (!vx_check(0, VX_ADMIN|VX_WATCH)) + return -EPERM; + + read_lock(&tasklist_lock); + tsk = find_task_by_real_pid(id); + xid = (tsk) ? tsk->xid : -ESRCH; + read_unlock(&tasklist_lock); + } + else + xid = vx_current_xid(); + return xid; } @@ -549,24 +656,19 @@ int vc_ctx_create(uint32_t xid, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID)) + if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID)) return -EINVAL; - if (xid < 1) + if (xid < 2) return -EINVAL; - new_vxi = __loc_vx_info(xid, &ret); - if (!new_vxi) - return ret; - if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) { - ret = -EEXIST; - goto out_put; - } + new_vxi = __create_vx_info(xid); + if (IS_ERR(new_vxi)) + return PTR_ERR(new_vxi); ret = new_vxi->vx_id; vx_migrate_task(current, new_vxi); /* if this fails, we might end up with a hashed vx_info */ -out_put: put_vx_info(new_vxi); return ret; } @@ -575,7 +677,7 @@ out_put: int vc_ctx_migrate(uint32_t id, void __user *data) { struct vx_info *vxi; - + if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -694,7 +796,5 @@ int vc_set_ccaps(uint32_t id, void __user *data) #include -EXPORT_SYMBOL_GPL(rcu_free_vx_info); -EXPORT_SYMBOL_GPL(vx_info_hash_lock); -EXPORT_SYMBOL_GPL(unhash_vx_info); +EXPORT_SYMBOL_GPL(free_vx_info);