Merge to Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.13-vs2...
[linux-2.6.git] / kernel / vserver / context.c
index 533e104..55a7062 100644 (file)
@@ -3,7 +3,7 @@
  *
  *  Virtual Server: Context Support
  *
- *  Copyright (C) 2003-2004  Herbert Pötzl
+ *  Copyright (C) 2003-2005  Herbert Pötzl
  *
  *  V0.01  context helper
  *  V0.02  vx_ctx_kill syscall command
  *  V0.06  task_xid and info commands
  *  V0.07  context flags and caps
  *  V0.08  switch to RCU based hash
+ *  V0.09  revert to non RCU for now
+ *  V0.10  and back to working RCU hash
+ *  V0.11  and back to locking again
  *
  */
 
-#include <linux/config.h>
 #include <linux/slab.h>
-#include <linux/vserver.h>
+#include <linux/types.h>
+#include <linux/namespace.h>
+
+#include <linux/sched.h>
+#include <linux/vserver/network.h>
 #include <linux/vserver/legacy.h>
-#include <linux/vs_base.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/debug.h>
+#include <linux/vserver/limit_int.h>
+
 #include <linux/vs_context.h>
-#include <linux/kernel_stat.h>
-#include <linux/namespace.h>
-#include <linux/rcupdate.h>
+#include <linux/vs_limit.h>
+#include <linux/vserver/context_cmd.h>
 
+#include <linux/err.h>
 #include <asm/errno.h>
 
+#include "cvirt_init.h"
+#include "limit_init.h"
+#include "sched_init.h"
+
 
 /*     __alloc_vx_info()
 
@@ -37,8 +50,8 @@
 static struct vx_info *__alloc_vx_info(xid_t xid)
 {
        struct vx_info *new = NULL;
-       
-       vxdprintk("alloc_vx_info(%d)\n", xid);
+
+       vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
 
        /* would this benefit from a slab cache? */
        new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
@@ -47,10 +60,16 @@ static struct vx_info *__alloc_vx_info(xid_t xid)
 
        memset (new, 0, sizeof(struct vx_info));
        new->vx_id = xid;
-       INIT_RCU_HEAD(&new->vx_rcu);
        INIT_HLIST_NODE(&new->vx_hlist);
-       atomic_set(&new->vx_refcnt, 0);
        atomic_set(&new->vx_usecnt, 0);
+       atomic_set(&new->vx_tasks, 0);
+       new->vx_parent = NULL;
+       new->vx_state = 0;
+       init_waitqueue_head(&new->vx_wait);
+
+       /* prepare reaper */
+       get_task_struct(child_reaper);
+       new->vx_reaper = child_reaper;
 
        /* rest of init goes here */
        vx_info_init_limit(&new->limit);
@@ -58,11 +77,16 @@ static struct vx_info *__alloc_vx_info(xid_t xid)
        vx_info_init_cvirt(&new->cvirt);
        vx_info_init_cacct(&new->cacct);
 
-       new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
+       new->vx_flags = VXF_INIT_SET;
        new->vx_bcaps = CAP_INIT_EFF_SET;
        new->vx_ccaps = 0;
 
-       vxdprintk("alloc_vx_info(%d) = %p\n", xid, new);
+       new->reboot_cmd = 0;
+       new->exit_code = 0;
+
+       vxdprintk(VXD_CBIT(xid, 0),
+               "alloc_vx_info(%d) = %p", xid, new);
+       vxh_alloc_vx_info(new);
        return new;
 }
 
@@ -72,31 +96,63 @@ static struct vx_info *__alloc_vx_info(xid_t xid)
 
 static void __dealloc_vx_info(struct vx_info *vxi)
 {
-       vxdprintk("dealloc_vx_info(%p)\n", vxi);
+       vxdprintk(VXD_CBIT(xid, 0),
+               "dealloc_vx_info(%p)", vxi);
+       vxh_dealloc_vx_info(vxi);
 
        vxi->vx_hlist.next = LIST_POISON1;
        vxi->vx_id = -1;
 
-       if (vxi->vx_namespace)
-               put_namespace(vxi->vx_namespace);
-       if (vxi->vx_fs)
-               put_fs_struct(vxi->vx_fs);
-       
        vx_info_exit_limit(&vxi->limit);
        vx_info_exit_sched(&vxi->sched);
        vx_info_exit_cvirt(&vxi->cvirt);
        vx_info_exit_cacct(&vxi->cacct);
-       
-       BUG_ON(atomic_read(&vxi->vx_usecnt));
-       BUG_ON(atomic_read(&vxi->vx_refcnt));
 
+       vxi->vx_state |= VXS_RELEASED;
        kfree(vxi);
 }
 
+static void __shutdown_vx_info(struct vx_info *vxi)
+{
+       struct namespace *namespace;
+       struct fs_struct *fs;
+
+       might_sleep();
+
+       vxi->vx_state |= VXS_SHUTDOWN;
+       vs_state_change(vxi, VSC_SHUTDOWN);
+
+       namespace = xchg(&vxi->vx_namespace, NULL);
+       if (namespace)
+               put_namespace(namespace);
+
+       fs = xchg(&vxi->vx_fs, NULL);
+       if (fs)
+               put_fs_struct(fs);
+}
+
+/* exported stuff */
+
+void free_vx_info(struct vx_info *vxi)
+{
+       /* context shutdown is mandatory */
+       BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
+
+       BUG_ON(atomic_read(&vxi->vx_usecnt));
+       BUG_ON(atomic_read(&vxi->vx_tasks));
+
+       BUG_ON(vx_info_state(vxi, VXS_HASHED));
+
+       BUG_ON(vxi->vx_namespace);
+       BUG_ON(vxi->vx_fs);
+
+       __dealloc_vx_info(vxi);
+}
+
 
 /*     hash table for vx_info hash */
 
-#define        VX_HASH_SIZE    13
+#define VX_HASH_SIZE   13
 
 struct hlist_head vx_info_hash[VX_HASH_SIZE];
 
@@ -118,11 +174,18 @@ static inline unsigned int __hashval(xid_t xid)
 static inline void __hash_vx_info(struct vx_info *vxi)
 {
        struct hlist_head *head;
-       
-       vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
-       get_vx_info(vxi);
+
+       vxd_assert_lock(&vx_info_hash_lock);
+       vxdprintk(VXD_CBIT(xid, 4),
+               "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+       vxh_hash_vx_info(vxi);
+
+       /* context must not be hashed */
+       BUG_ON(vx_info_state(vxi, VXS_HASHED));
+
+       vxi->vx_state |= VXS_HASHED;
        head = &vx_info_hash[__hashval(vxi->vx_id)];
-       hlist_add_head_rcu(&vxi->vx_hlist, head);
+       hlist_add_head(&vxi->vx_hlist, head);
 }
 
 /*     __unhash_vx_info()
@@ -132,31 +195,44 @@ static inline void __hash_vx_info(struct vx_info *vxi)
 
 static inline void __unhash_vx_info(struct vx_info *vxi)
 {
-       vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
-       hlist_del_rcu(&vxi->vx_hlist);
-       put_vx_info(vxi);
+       vxd_assert_lock(&vx_info_hash_lock);
+       vxdprintk(VXD_CBIT(xid, 4),
+               "__unhash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+       vxh_unhash_vx_info(vxi);
+
+       /* context must be hashed */
+       BUG_ON(!vx_info_state(vxi, VXS_HASHED));
+
+       vxi->vx_state &= ~VXS_HASHED;
+       hlist_del(&vxi->vx_hlist);
 }
 
 
 /*     __lookup_vx_info()
 
-       * requires the rcu_read_lock()
+       * requires the hash_lock to be held
        * doesn't increment the vx_refcnt                       */
 
 static inline struct vx_info *__lookup_vx_info(xid_t xid)
 {
        struct hlist_head *head = &vx_info_hash[__hashval(xid)];
        struct hlist_node *pos;
+       struct vx_info *vxi;
 
-       hlist_for_each_rcu(pos, head) {
-               struct vx_info *vxi =
-                       hlist_entry(pos, struct vx_info, vx_hlist);
+       vxd_assert_lock(&vx_info_hash_lock);
+       hlist_for_each(pos, head) {
+               vxi = hlist_entry(pos, struct vx_info, vx_hlist);
 
-               if (vxi->vx_id == xid) {
-                       return vxi;
-               }
+               if (vxi->vx_id == xid)
+                       goto found;
        }
-       return NULL;
+       vxi = NULL;
+found:
+       vxdprintk(VXD_CBIT(xid, 0),
+               "__lookup_vx_info(#%u): %p[#%u]",
+               xid, vxi, vxi?vxi->vx_id:0);
+       vxh_lookup_vx_info(vxi, xid);
+       return vxi;
 }
 
 
@@ -169,16 +245,22 @@ static inline xid_t __vx_dynamic_id(void)
 {
        static xid_t seq = MAX_S_CONTEXT;
        xid_t barrier = seq;
-       
+
+       vxd_assert_lock(&vx_info_hash_lock);
        do {
                if (++seq > MAX_S_CONTEXT)
                        seq = MIN_D_CONTEXT;
-               if (!__lookup_vx_info(seq))
+               if (!__lookup_vx_info(seq)) {
+                       vxdprintk(VXD_CBIT(xid, 4),
+                               "__vx_dynamic_id: [#%d]", seq);
                        return seq;
+               }
        } while (barrier != seq);
        return 0;
 }
 
+#ifdef CONFIG_VSERVER_LEGACY
+
 /*     __loc_vx_info()
 
        * locate or create the requested context
@@ -187,14 +269,15 @@ static inline xid_t __vx_dynamic_id(void)
 static struct vx_info * __loc_vx_info(int id, int *err)
 {
        struct vx_info *new, *vxi = NULL;
-       
-       vxdprintk("loc_vx_info(%d)\n", id);
+
+       vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id);
 
        if (!(new = __alloc_vx_info(id))) {
                *err = -ENOMEM;
                return NULL;
        }
 
+       /* required to make dynamic xids unique */
        spin_lock(&vx_info_hash_lock);
 
        /* dynamic context requested */
@@ -210,11 +293,13 @@ static struct vx_info * __loc_vx_info(int id, int *err)
        else if ((vxi = __lookup_vx_info(id))) {
                /* context in setup is not available */
                if (vxi->vx_flags & VXF_STATE_SETUP) {
-                       vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi);
+                       vxdprintk(VXD_CBIT(xid, 0),
+                               "loc_vx_info(%d) = %p (not available)", id, vxi);
                        vxi = NULL;
                        *err = -EBUSY;
                } else {
-                       vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi);
+                       vxdprintk(VXD_CBIT(xid, 0),
+                               "loc_vx_info(%d) = %p (found)", id, vxi);
                        get_vx_info(vxi);
                        *err = 0;
                }
@@ -222,93 +307,131 @@ static struct vx_info * __loc_vx_info(int id, int *err)
        }
 
        /* new context requested */
-       vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new);
+       vxdprintk(VXD_CBIT(xid, 0),
+               "loc_vx_info(%d) = %p (new)", id, new);
        __hash_vx_info(get_vx_info(new));
        vxi = new, new = NULL;
        *err = 1;
 
 out_unlock:
        spin_unlock(&vx_info_hash_lock);
+       vxh_loc_vx_info(vxi, id);
        if (new)
                __dealloc_vx_info(new);
        return vxi;
 }
 
+#endif
 
+/*     __create_vx_info()
 
-/*     exported stuff                                          */
+       * create the requested context
+       * get() and hash it                                     */
 
+static struct vx_info * __create_vx_info(int id)
+{
+       struct vx_info *new, *vxi = NULL;
 
+       vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
 
-void rcu_free_vx_info(void *obj)
-{
-       struct vx_info *vxi = obj;
-       int usecnt, refcnt;
+       if (!(new = __alloc_vx_info(id)))
+               return ERR_PTR(-ENOMEM);
 
-       BUG_ON(!vxi);
+       /* required to make dynamic xids unique */
+       spin_lock(&vx_info_hash_lock);
 
-       usecnt = atomic_read(&vxi->vx_usecnt);
-       BUG_ON(usecnt < 0);
+       /* dynamic context requested */
+       if (id == VX_DYNAMIC_ID) {
+               id = __vx_dynamic_id();
+               if (!id) {
+                       printk(KERN_ERR "no dynamic context available.\n");
+                       vxi = ERR_PTR(-EAGAIN);
+                       goto out_unlock;
+               }
+               new->vx_id = id;
+       }
+       /* static context requested */
+       else if ((vxi = __lookup_vx_info(id))) {
+               vxdprintk(VXD_CBIT(xid, 0),
+                       "create_vx_info(%d) = %p (already there)", id, vxi);
+               if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
+                       vxi = ERR_PTR(-EBUSY);
+               else
+                       vxi = ERR_PTR(-EEXIST);
+               goto out_unlock;
+       }
+       /* dynamic xid creation blocker */
+       else if (id >= MIN_D_CONTEXT) {
+               vxdprintk(VXD_CBIT(xid, 0),
+                       "create_vx_info(%d) (dynamic rejected)", id);
+               vxi = ERR_PTR(-EINVAL);
+               goto out_unlock;
+       }
 
-       refcnt = atomic_read(&vxi->vx_refcnt);
-       BUG_ON(refcnt < 0);
+       /* new context */
+       vxdprintk(VXD_CBIT(xid, 0),
+               "create_vx_info(%d) = %p (new)", id, new);
+       __hash_vx_info(get_vx_info(new));
+       vxi = new, new = NULL;
 
-       if (!usecnt)
-               __dealloc_vx_info(vxi);
-       else
-               printk("!!! rcu didn't free\n");
+out_unlock:
+       spin_unlock(&vx_info_hash_lock);
+       vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id);
+       if (new)
+               __dealloc_vx_info(new);
+       return vxi;
 }
 
+
+/*     exported stuff                                          */
+
+
 void unhash_vx_info(struct vx_info *vxi)
 {
+       __shutdown_vx_info(vxi);
        spin_lock(&vx_info_hash_lock);
        __unhash_vx_info(vxi);
        spin_unlock(&vx_info_hash_lock);
+       __wakeup_vx_info(vxi);
 }
 
-/*     locate_vx_info()
 
-       * search for a vx_info and get() it                     
+/*     lookup_vx_info()
+
+       * search for a vx_info and get() it
        * negative id means current                             */
 
-struct vx_info *locate_vx_info(int id)
+struct vx_info *lookup_vx_info(int id)
 {
-       struct vx_info *vxi;
-       
+       struct vx_info *vxi = NULL;
+
        if (id < 0) {
                vxi = get_vx_info(current->vx_info);
-       } else {
-               rcu_read_lock();
+       } else if (id > 1) {
+               spin_lock(&vx_info_hash_lock);
                vxi = get_vx_info(__lookup_vx_info(id));
-               rcu_read_unlock();
+               spin_unlock(&vx_info_hash_lock);
        }
        return vxi;
 }
 
-/*     vx_info_is_hashed()
+/*     xid_is_hashed()
 
        * verify that xid is still hashed                       */
 
-int vx_info_is_hashed(xid_t xid)
+int xid_is_hashed(xid_t xid)
 {
        int hashed;
 
-       rcu_read_lock();
+       spin_lock(&vx_info_hash_lock);
        hashed = (__lookup_vx_info(xid) != NULL);
-       rcu_read_unlock();
+       spin_unlock(&vx_info_hash_lock);
        return hashed;
 }
 
 #ifdef CONFIG_VSERVER_LEGACY
 
-#if 0
-struct vx_info *alloc_vx_info(xid_t xid)
-{
-       return __alloc_vx_info(xid);
-}
-#endif
-
-struct vx_info *locate_or_create_vx_info(int id)
+struct vx_info *lookup_or_create_vx_info(int id)
 {
        int err;
 
@@ -319,41 +442,41 @@ struct vx_info *locate_or_create_vx_info(int id)
 
 #ifdef CONFIG_PROC_FS
 
-#define hlist_for_each_rcu(pos, head) \
-        for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
-               pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
-
 int get_xid_list(int index, unsigned int *xids, int size)
 {
        int hindex, nr_xids = 0;
 
-       rcu_read_lock();
        for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
                struct hlist_head *head = &vx_info_hash[hindex];
                struct hlist_node *pos;
 
-               hlist_for_each_rcu(pos, head) {
+               spin_lock(&vx_info_hash_lock);
+               hlist_for_each(pos, head) {
                        struct vx_info *vxi;
 
                        if (--index > 0)
                                continue;
 
                        vxi = hlist_entry(pos, struct vx_info, vx_hlist);
-                       xids[nr_xids] = vxi->vx_id;                     
-                       if (++nr_xids >= size)
+                       xids[nr_xids] = vxi->vx_id;
+                       if (++nr_xids >= size) {
+                               spin_unlock(&vx_info_hash_lock);
                                goto out;
+                       }
                }
+               /* keep the lock time short */
+               spin_unlock(&vx_info_hash_lock);
        }
 out:
-       rcu_read_unlock();
        return nr_xids;
 }
 #endif
 
+
 int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
 {
        struct user_struct *new_user, *old_user;
-       
+
        if (!p || !vxi)
                BUG();
        new_user = alloc_uid(vxi->vx_id, p->uid);
@@ -370,10 +493,8 @@ int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
        return 0;
 }
 
-void vx_mask_bcaps(struct task_struct *p)
+void vx_mask_bcaps(struct vx_info *vxi, struct task_struct *p)
 {
-       struct vx_info *vxi = p->vx_info;
-
        p->cap_effective &= vxi->vx_bcaps;
        p->cap_inheritable &= vxi->vx_bcaps;
        p->cap_permitted &= vxi->vx_bcaps;
@@ -382,37 +503,18 @@ void vx_mask_bcaps(struct task_struct *p)
 
 #include <linux/file.h>
 
-static inline int vx_nofiles_task(struct task_struct *tsk)
-{
-       struct files_struct *files = tsk->files;
-       const unsigned long *obptr, *cbptr;
-       int count, total;
-
-       spin_lock(&files->file_lock);
-       obptr = files->open_fds->fds_bits;
-       cbptr = files->close_on_exec->fds_bits;
-       count = files->max_fds / (sizeof(unsigned long) * 8);
-       for (total = 0; count > 0; count--) {
-               if (*obptr)
-                       total += hweight_long(*obptr);
-               obptr++;
-       /*      if (*cbptr)
-                       total += hweight_long(*cbptr);
-               cbptr++; */
-       }
-       spin_unlock(&files->file_lock);
-       return total;
-}
-
-static inline int vx_openfd_task(struct task_struct *tsk)
+static int vx_openfd_task(struct task_struct *tsk)
 {
        struct files_struct *files = tsk->files;
+       struct fdtable *fdt;
        const unsigned long *bptr;
        int count, total;
 
+       /* no rcu_read_lock() because of spin_lock() */
        spin_lock(&files->file_lock);
-       bptr = files->open_fds->fds_bits;
-       count = files->max_fds / (sizeof(unsigned long) * 8);
+       fdt = files_fdtable(files);
+       bptr = fdt->open_fds->fds_bits;
+       count = fdt->max_fds / (sizeof(unsigned long) * 8);
        for (total = 0; count > 0; count--) {
                if (*bptr)
                        total += hweight_long(*bptr);
@@ -431,7 +533,7 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
 {
        struct vx_info *old_vxi;
        int ret = 0;
-       
+
        if (!p || !vxi)
                BUG();
 
@@ -439,53 +541,141 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
        if (old_vxi == vxi)
                goto out;
 
-       vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
+       vxdprintk(VXD_CBIT(xid, 5),
+               "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
                vxi->vx_id, atomic_read(&vxi->vx_usecnt));
 
        if (!(ret = vx_migrate_user(p, vxi))) {
-               int openfd, nofiles;
+               int openfd;
 
                task_lock(p);
                openfd = vx_openfd_task(p);
-               nofiles = vx_nofiles_task(p);
 
                if (old_vxi) {
-                       atomic_dec(&old_vxi->cacct.nr_threads);
+                       atomic_dec(&old_vxi->cvirt.nr_threads);
+                       atomic_dec(&old_vxi->cvirt.nr_running);
                        atomic_dec(&old_vxi->limit.rcur[RLIMIT_NPROC]);
-                       atomic_sub(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]);
-                       atomic_sub(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]);
-               }               
-               atomic_inc(&vxi->cacct.nr_threads);
+                       /* FIXME: what about the struct files here? */
+                       atomic_sub(openfd, &old_vxi->limit.rcur[VLIMIT_OPENFD]);
+               }
+               atomic_inc(&vxi->cvirt.nr_threads);
+               atomic_inc(&vxi->cvirt.nr_running);
                atomic_inc(&vxi->limit.rcur[RLIMIT_NPROC]);
-               atomic_add(nofiles, &vxi->limit.rcur[RLIMIT_NOFILE]);
-               atomic_add(openfd, &vxi->limit.rcur[RLIMIT_OPENFD]);
-               /* should be handled in set_vx_info !! */
-               if (old_vxi)
+               /* FIXME: what about the struct files here? */
+               atomic_add(openfd, &vxi->limit.rcur[VLIMIT_OPENFD]);
+
+               if (old_vxi) {
+                       release_vx_info(old_vxi, p);
                        clr_vx_info(&p->vx_info);
+               }
+               claim_vx_info(vxi, p);
                set_vx_info(&p->vx_info, vxi);
                p->xid = vxi->vx_id;
-               vx_mask_bcaps(p);
-               task_unlock(p);
 
-               /* obsoleted by clr/set */
-               // put_vx_info(old_vxi);
+               vxdprintk(VXD_CBIT(xid, 5),
+                       "moved task %p into vxi:%p[#%d]",
+                       p, vxi, vxi->vx_id);
+
+               vx_mask_bcaps(vxi, p);
+               task_unlock(p);
        }
 out:
        put_vx_info(old_vxi);
        return ret;
 }
 
+int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
+{
+       struct task_struct *old_reaper;
+
+       if (!vxi)
+               return -EINVAL;
+
+       vxdprintk(VXD_CBIT(xid, 6),
+               "vx_set_reaper(%p[#%d],%p[#%d,%d])",
+               vxi, vxi->vx_id, p, p->xid, p->pid);
+
+       old_reaper = vxi->vx_reaper;
+       if (old_reaper == p)
+               return 0;
+
+       /* set new child reaper */
+       get_task_struct(p);
+       vxi->vx_reaper = p;
+       put_task_struct(old_reaper);
+       return 0;
+}
+
 int vx_set_init(struct vx_info *vxi, struct task_struct *p)
 {
        if (!vxi)
                return -EINVAL;
-        if (vxi->vx_initpid)
-                return -EPERM;
 
-        vxi->vx_initpid = p->tgid;
+       vxdprintk(VXD_CBIT(xid, 6),
+               "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
+               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
+
+       vxi->vx_flags &= ~VXF_STATE_INIT;
+       vxi->vx_initpid = p->tgid;
        return 0;
 }
 
+void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
+{
+       vxdprintk(VXD_CBIT(xid, 6),
+               "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
+               vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
+
+       vxi->exit_code = code;
+       vxi->vx_initpid = 0;
+}
+
+void vx_set_persistent(struct vx_info *vxi)
+{
+       vxdprintk(VXD_CBIT(xid, 6),
+               "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
+
+       get_vx_info(vxi);
+       claim_vx_info(vxi, current);
+}
+
+void vx_clear_persistent(struct vx_info *vxi)
+{
+       vxdprintk(VXD_CBIT(xid, 6),
+               "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
+
+       release_vx_info(vxi, current);
+       put_vx_info(vxi);
+}
+
+void vx_update_persistent(struct vx_info *vxi)
+{
+       if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
+               vx_set_persistent(vxi);
+       else
+               vx_clear_persistent(vxi);
+}
+
+
+/*     task must be current or locked          */
+
+void   exit_vx_info(struct task_struct *p, int code)
+{
+       struct vx_info *vxi = p->vx_info;
+
+       if (vxi) {
+               atomic_dec(&vxi->cvirt.nr_threads);
+               vx_nproc_dec(p);
+
+               vxi->exit_code = code;
+               if (vxi->vx_initpid == p->tgid)
+                       vx_exit_init(vxi, p, code);
+               if (vxi->vx_reaper == p)
+                       vx_set_reaper(vxi, child_reaper);
+               release_vx_info(vxi, p);
+       }
+}
+
 
 /* vserver syscall commands below here */
 
@@ -496,22 +686,22 @@ int vx_set_init(struct vx_info *vxi, struct task_struct *p)
 
 int vc_task_xid(uint32_t id, void __user *data)
 {
-        xid_t xid;
+       xid_t xid;
 
-        if (id) {
-                struct task_struct *tsk;
+       if (id) {
+               struct task_struct *tsk;
 
-                if (!vx_check(0, VX_ADMIN|VX_WATCH))
-                        return -EPERM;
+               if (!vx_check(0, VX_ADMIN|VX_WATCH))
+                       return -EPERM;
 
-                read_lock(&tasklist_lock);
-                tsk = find_task_by_pid(id);
-                xid = (tsk) ? tsk->xid : -ESRCH;
-                read_unlock(&tasklist_lock);
-        }
-        else
-                xid = current->xid;
-        return xid;
+               read_lock(&tasklist_lock);
+               tsk = find_task_by_real_pid(id);
+               xid = (tsk) ? tsk->xid : -ESRCH;
+               read_unlock(&tasklist_lock);
+       }
+       else
+               xid = vx_current_xid();
+       return xid;
 }
 
 
@@ -525,7 +715,7 @@ int vc_vx_info(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
                return -EPERM;
 
-       vxi = locate_vx_info(id);
+       vxi = lookup_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -543,30 +733,47 @@ int vc_vx_info(uint32_t id, void __user *data)
 
 int vc_ctx_create(uint32_t xid, void __user *data)
 {
+       struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
        struct vx_info *new_vxi;
        int ret;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
+       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
 
-       if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
+       if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID))
                return -EINVAL;
-
-       if (xid < 1)
+       if (xid < 2)
                return -EINVAL;
 
-       new_vxi = __loc_vx_info(xid, &ret);
-       if (!new_vxi)
-               return ret;
-       if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
-               ret = -EEXIST;
-               goto out_put;
-       }
+       new_vxi = __create_vx_info(xid);
+       if (IS_ERR(new_vxi))
+               return PTR_ERR(new_vxi);
+
+       /* initial flags */
+       new_vxi->vx_flags = vc_data.flagword;
 
-       ret = new_vxi->vx_id;
-       vx_migrate_task(current, new_vxi);
-       /* if this fails, we might end up with a hashed vx_info */
-out_put:
+       /* get a reference for persistent contexts */
+       if ((vc_data.flagword & VXF_PERSISTENT))
+               vx_set_persistent(new_vxi);
+
+       ret = -ENOEXEC;
+       if (vs_state_change(new_vxi, VSC_STARTUP))
+               goto out_unhash;
+       ret = vx_migrate_task(current, new_vxi);
+       if (!ret) {
+               /* return context id on success */
+               ret = new_vxi->vx_id;
+               goto out;
+       }
+out_unhash:
+       /* prepare for context disposal */
+       new_vxi->vx_state |= VXS_SHUTDOWN;
+       if ((vc_data.flagword & VXF_PERSISTENT))
+               vx_clear_persistent(new_vxi);
+       __unhash_vx_info(new_vxi);
+out:
        put_vx_info(new_vxi);
        return ret;
 }
@@ -574,10 +781,13 @@ out_put:
 
 int vc_ctx_migrate(uint32_t id, void __user *data)
 {
+       struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
        struct vx_info *vxi;
-       
+
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
+       if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
+               return -EFAULT;
 
        /* dirty hack until Spectator becomes a cap */
        if (id == 1) {
@@ -585,10 +795,14 @@ int vc_ctx_migrate(uint32_t id, void __user *data)
                return 0;
        }
 
-       vxi = locate_vx_info(id);
+       vxi = lookup_vx_info(id);
        if (!vxi)
                return -ESRCH;
        vx_migrate_task(current, vxi);
+       if (vc_data.flagword & VXM_SET_INIT)
+               vx_set_init(vxi, current);
+       if (vc_data.flagword & VXM_SET_REAPER)
+               vx_set_reaper(vxi, current);
        put_vx_info(vxi);
        return 0;
 }
@@ -602,7 +816,7 @@ int vc_get_cflags(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       vxi = locate_vx_info(id);
+       vxi = lookup_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -629,7 +843,7 @@ int vc_set_cflags(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       vxi = locate_vx_info(id);
+       vxi = lookup_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -637,14 +851,20 @@ int vc_set_cflags(uint32_t id, void __user *data)
        mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
        trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
 
-       if (trigger & VXF_STATE_SETUP)
-               vx_mask_bcaps(current);
-       if (trigger & VXF_STATE_INIT)
-               if (vxi == current->vx_info)
+       if (vxi == current->vx_info) {
+               if (trigger & VXF_STATE_SETUP)
+                       vx_mask_bcaps(vxi, current);
+               if (trigger & VXF_STATE_INIT) {
                        vx_set_init(vxi, current);
+                       vx_set_reaper(vxi, current);
+               }
+       }
 
        vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
                vc_data.flagword, mask);
+       if (trigger & VXF_PERSISTENT)
+               vx_update_persistent(vxi);
+
        put_vx_info(vxi);
        return 0;
 }
@@ -657,7 +877,7 @@ int vc_get_ccaps(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       vxi = locate_vx_info(id);
+       vxi = lookup_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -681,7 +901,7 @@ int vc_set_ccaps(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       vxi = locate_vx_info(id);
+       vxi = lookup_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -694,7 +914,5 @@ int vc_set_ccaps(uint32_t id, void __user *data)
 
 #include <linux/module.h>
 
-EXPORT_SYMBOL_GPL(rcu_free_vx_info);
-EXPORT_SYMBOL_GPL(vx_info_hash_lock);
-EXPORT_SYMBOL_GPL(unhash_vx_info);
+EXPORT_SYMBOL_GPL(free_vx_info);