X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=kernel%2Fvserver%2Fcontext.c;h=8b3cee7cc0630da8b2bd621668d6ca0396da1a6c;hb=d46bc780027c5439db9f72d42c0732775b53925a;hp=538834c57b69e6ae32796a5fb86b508b10790536;hpb=a8e794ca871505c8ea96cc102f4ad555c5231d7f;p=linux-2.6.git diff --git a/kernel/vserver/context.c b/kernel/vserver/context.c index 538834c57..8b3cee7cc 100644 --- a/kernel/vserver/context.c +++ b/kernel/vserver/context.c @@ -12,6 +12,7 @@ * V0.05 rlimit basic implementation * V0.06 task_xid and info commands * V0.07 context flags and caps + * V0.08 switch to RCU based hash * */ @@ -22,38 +23,35 @@ #include #include #include +#include #include -/* system functions */ +/* __alloc_vx_info() + * allocate an initialized vx_info struct + * doesn't make it visible (hash) */ -LIST_HEAD(vx_infos); - -spinlock_t vxlist_lock - __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; - - -/* - * struct vx_info allocation and deallocation - */ - -static struct vx_info *alloc_vx_info(int id) +static struct vx_info *__alloc_vx_info(xid_t xid) { struct vx_info *new = NULL; - vxdprintk("alloc_vx_info(%d)\n", id); + vxdprintk("alloc_vx_info(%d)\n", xid); + /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); if (!new) return 0; memset (new, 0, sizeof(struct vx_info)); - new->vx_id = id; - INIT_LIST_HEAD(&new->vx_list); + new->vx_id = xid; + INIT_RCU_HEAD(&new->vx_rcu); + INIT_HLIST_NODE(&new->vx_hlist); + atomic_set(&new->vx_refcnt, 0); + atomic_set(&new->vx_usecnt, 0); + /* rest of init goes here */ - vx_info_init_limit(&new->limit); vx_info_init_sched(&new->sched); vx_info_init_cvirt(&new->cvirt); @@ -63,13 +61,21 @@ static struct vx_info *alloc_vx_info(int id) new->vx_bcaps = CAP_INIT_EFF_SET; new->vx_ccaps = 0; - vxdprintk("alloc_vx_info(%d) = %p\n", id, new); + vxdprintk("alloc_vx_info(%d) = %p\n", xid, new); return new; } -void free_vx_info(struct vx_info *vxi) +/* __dealloc_vx_info() + + * final disposal of vx_info */ + +static void __dealloc_vx_info(struct vx_info *vxi) { - vxdprintk("free_vx_info(%p)\n", vxi); + vxdprintk("dealloc_vx_info(%p)\n", vxi); + + vxi->vx_hlist.next = LIST_POISON1; + vxi->vx_id = -1; + if (vxi->vx_namespace) put_namespace(vxi->vx_namespace); if (vxi->vx_fs) @@ -80,69 +86,85 @@ void free_vx_info(struct vx_info *vxi) vx_info_exit_cvirt(&vxi->cvirt); vx_info_exit_cacct(&vxi->cacct); - BUG_ON(atomic_read(&vxi->vx_refcount)); - vxi->vx_id = -1; + BUG_ON(atomic_read(&vxi->vx_usecnt)); + BUG_ON(atomic_read(&vxi->vx_refcnt)); kfree(vxi); } -/* - * struct vx_info search by id - * assumes vxlist_lock is held - */ +/* hash table for vx_info hash */ -static __inline__ struct vx_info *__find_vx_info(int id) -{ - struct vx_info *vxi; +#define VX_HASH_SIZE 13 - list_for_each_entry(vxi, &vx_infos, vx_list) - if (vxi->vx_id == id) - return vxi; - return 0; +struct hlist_head vx_info_hash[VX_HASH_SIZE]; + +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(xid_t xid) +{ + return (xid % VX_HASH_SIZE); } -/* - * struct vx_info ref stuff - */ -struct vx_info *find_vx_info(int id) +/* __hash_vx_info() + + * add the vxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_vx_info(struct vx_info *vxi) { - struct vx_info *vxi; + struct hlist_head *head; - if (id < 0) { - vxi = current->vx_info; - get_vx_info(vxi); - } else { - spin_lock(&vxlist_lock); - if ((vxi = __find_vx_info(id))) - get_vx_info(vxi); - spin_unlock(&vxlist_lock); - } - return vxi; + vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); + get_vx_info(vxi); + head = &vx_info_hash[__hashval(vxi->vx_id)]; + hlist_add_head_rcu(&vxi->vx_hlist, head); } -/* - * verify that id is a valid xid - */ +/* __unhash_vx_info() + + * remove the vxi from the global hash table + * requires the hash_lock to be held */ -int vx_info_id_valid(int id) +static inline void __unhash_vx_info(struct vx_info *vxi) { - int valid; + vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); + hlist_del_rcu(&vxi->vx_hlist); + put_vx_info(vxi); +} + - spin_lock(&vxlist_lock); - valid = (__find_vx_info(id) != NULL); - spin_unlock(&vxlist_lock); - return valid; +/* __lookup_vx_info() + + * requires the rcu_read_lock() + * doesn't increment the vx_refcnt */ + +static inline struct vx_info *__lookup_vx_info(xid_t xid) +{ + struct hlist_head *head = &vx_info_hash[__hashval(xid)]; + struct hlist_node *pos; + + hlist_for_each(pos, head) { + struct vx_info *vxi = + hlist_entry(pos, struct vx_info, vx_hlist); + + if (vxi->vx_id == xid) { + return vxi; + } + } + return NULL; } -/* - * dynamic context id ... - */ +/* __vx_dynamic_id() -static __inline__ xid_t __vx_dynamic_id(void) + * find unused dynamic xid + * requires the hash_lock to be held */ + +static inline xid_t __vx_dynamic_id(void) { static xid_t seq = MAX_S_CONTEXT; xid_t barrier = seq; @@ -150,29 +172,29 @@ static __inline__ xid_t __vx_dynamic_id(void) do { if (++seq > MAX_S_CONTEXT) seq = MIN_D_CONTEXT; - if (!__find_vx_info(seq)) + if (!__lookup_vx_info(seq)) return seq; } while (barrier != seq); return 0; } -static struct vx_info * __foc_vx_info(int id, int *err) +/* __loc_vx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct vx_info * __loc_vx_info(int id, int *err) { struct vx_info *new, *vxi = NULL; - vxdprintk("foc_vx_info(%d)\n", id); - if (!(new = alloc_vx_info(id))) { - *err = -ENOMEM; - return NULL; - } + vxdprintk("loc_vx_info(%d)\n", id); - /* dirty hack until Spectator becomes a cap */ - if (id == 0 || id == 1) { - *err = -EBUSY; + if (!(new = __alloc_vx_info(id))) { + *err = -ENOMEM; return NULL; } - spin_lock(&vxlist_lock); + spin_lock(&vx_info_hash_lock); /* dynamic context requested */ if (id == VX_DYNAMIC_ID) { @@ -184,14 +206,14 @@ static struct vx_info * __foc_vx_info(int id, int *err) new->vx_id = id; } /* existing context requested */ - else if ((vxi = __find_vx_info(id))) { + else if ((vxi = __lookup_vx_info(id))) { /* context in setup is not available */ if (vxi->vx_flags & VXF_STATE_SETUP) { - vxdprintk("foc_vx_info(%d) = %p (not available)\n", id, vxi); + vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi); vxi = NULL; *err = -EBUSY; } else { - vxdprintk("foc_vx_info(%d) = %p (found)\n", id, vxi); + vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi); get_vx_info(vxi); *err = 0; } @@ -199,27 +221,131 @@ static struct vx_info * __foc_vx_info(int id, int *err) } /* new context requested */ - vxdprintk("foc_vx_info(%d) = %p (new)\n", id, new); - atomic_set(&new->vx_refcount, 1); - list_add(&new->vx_list, &vx_infos); + vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new); + __hash_vx_info(get_vx_info(new)); vxi = new, new = NULL; *err = 1; out_unlock: - spin_unlock(&vxlist_lock); + spin_unlock(&vx_info_hash_lock); if (new) - free_vx_info(new); + __dealloc_vx_info(new); + return vxi; +} + + + +/* exported stuff */ + + + +void rcu_free_vx_info(void *obj) +{ + struct vx_info *vxi = obj; + int usecnt, refcnt; + + usecnt = atomic_read(&vxi->vx_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&vxi->vx_refcnt); + BUG_ON(refcnt < 0); + + if (!usecnt) + __dealloc_vx_info(vxi); + else + printk("!!! rcu didn't free\n"); +} + +void unhash_vx_info(struct vx_info *vxi) +{ + spin_lock(&vx_info_hash_lock); + __unhash_vx_info(vxi); + spin_unlock(&vx_info_hash_lock); +} + +/* locate_vx_info() + + * search for a vx_info and get() it + * negative id means current */ + +struct vx_info *locate_vx_info(int id) +{ + struct vx_info *vxi; + + if (id < 0) { + vxi = get_vx_info(current->vx_info); + } else { + rcu_read_lock(); + vxi = get_vx_info(__lookup_vx_info(id)); + rcu_read_unlock(); + } return vxi; } +/* vx_info_is_hashed() + + * verify that xid is still hashed */ + +int vx_info_is_hashed(xid_t xid) +{ + int hashed; + + rcu_read_lock(); + hashed = (__lookup_vx_info(xid) != NULL); + rcu_read_unlock(); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY -struct vx_info *find_or_create_vx_info(int id) +#if 0 +struct vx_info *alloc_vx_info(xid_t xid) +{ + return __alloc_vx_info(xid); +} +#endif + +struct vx_info *locate_or_create_vx_info(int id) { int err; - return __foc_vx_info(id, &err); + return __loc_vx_info(id, &err); } +#endif + +#ifdef CONFIG_PROC_FS + +#define hlist_for_each_rcu(pos, head) \ + for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \ + pos = pos->next, ({ smp_read_barrier_depends(); 0;})) + +int get_xid_list(int index, unsigned int *xids, int size) +{ + int hindex, nr_xids = 0; + + rcu_read_lock(); + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) { + struct hlist_head *head = &vx_info_hash[hindex]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct vx_info *vxi; + + if (--index > 0) + continue; + + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + xids[nr_xids] = vxi->vx_id; + if (++nr_xids >= size) + goto out; + } + } +out: + rcu_read_unlock(); + return nr_xids; +} +#endif int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) { @@ -300,17 +426,19 @@ static inline int vx_openfd_task(struct task_struct *tsk) int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) { - struct vx_info *old_vxi = task_get_vx_info(p); + struct vx_info *old_vxi; int ret = 0; if (!p || !vxi) BUG(); - vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, - vxi->vx_id, atomic_read(&vxi->vx_refcount)); + old_vxi = task_get_vx_info(p); if (old_vxi == vxi) goto out; + vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_usecnt)); + if (!(ret = vx_migrate_user(p, vxi))) { task_lock(p); if (old_vxi) { @@ -321,6 +449,9 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) atomic_inc(&vxi->limit.res[RLIMIT_NPROC]); atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]); atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]); + /* should be handled in set_vx_info !! */ + if (old_vxi) + clr_vx_info(&p->vx_info); set_vx_info(&p->vx_info, vxi); p->xid = vxi->vx_id; vx_mask_bcaps(p); @@ -383,7 +514,7 @@ int vc_vx_info(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) return -EPERM; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -401,7 +532,6 @@ int vc_vx_info(uint32_t id, void __user *data) int vc_ctx_create(uint32_t xid, void __user *data) { - // int ret = -ENOMEM; struct vx_info *new_vxi; int ret; @@ -414,7 +544,7 @@ int vc_ctx_create(uint32_t xid, void __user *data) if (xid < 1) return -EINVAL; - new_vxi = __foc_vx_info(xid, &ret); + new_vxi = __loc_vx_info(xid, &ret); if (!new_vxi) return ret; if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) { @@ -424,6 +554,7 @@ int vc_ctx_create(uint32_t xid, void __user *data) ret = new_vxi->vx_id; vx_migrate_task(current, new_vxi); + /* if this fails, we might end up with a hashed vx_info */ out_put: put_vx_info(new_vxi); return ret; @@ -443,7 +574,7 @@ int vc_ctx_migrate(uint32_t id, void __user *data) return 0; } - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; vx_migrate_task(current, vxi); @@ -460,13 +591,12 @@ int vc_get_cflags(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; vc_data.flagword = vxi->vx_flags; - // vc_data.mask = ~0UL; /* special STATE flag handling */ vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME); @@ -488,7 +618,7 @@ int vc_set_cflags(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -516,7 +646,7 @@ int vc_get_ccaps(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -540,7 +670,7 @@ int vc_set_ccaps(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -553,6 +683,6 @@ int vc_set_ccaps(uint32_t id, void __user *data) #include -EXPORT_SYMBOL_GPL(free_vx_info); -EXPORT_SYMBOL_GPL(vxlist_lock); +EXPORT_SYMBOL_GPL(rcu_free_vx_info); +EXPORT_SYMBOL_GPL(vx_info_hash_lock);