*
* Virtual Server: Context Support
*
- * Copyright (C) 2003-2004 Herbert Pötzl
+ * Copyright (C) 2003-2007 Herbert Pötzl
*
* V0.01 context helper
* V0.02 vx_ctx_kill syscall command
* V0.06 task_xid and info commands
* V0.07 context flags and caps
* V0.08 switch to RCU based hash
+ * V0.09 revert to non RCU for now
+ * V0.10 and back to working RCU hash
+ * V0.11 and back to locking again
+ * V0.12 referenced context store
+ * V0.13 separate per cpu data
+ * V0.14 changed vcmds to vxi arg
+ * V0.15 added context stat
+ * V0.16 have __create claim() the vxi
*
*/
-#include <linux/config.h>
#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/mnt_namespace.h>
+#include <linux/pid_namespace.h>
+
+#include <linux/sched.h>
#include <linux/vserver/context.h>
+#include <linux/vserver/network.h>
#include <linux/vserver/legacy.h>
-#include <linux/vinline.h>
-#include <linux/kernel_stat.h>
-#include <linux/namespace.h>
-#include <linux/rcupdate.h>
+#include <linux/vserver/debug.h>
+#include <linux/vserver/limit.h>
+#include <linux/vserver/limit_int.h>
+#include <linux/vserver/space.h>
+
+#include <linux/vs_context.h>
+#include <linux/vs_limit.h>
+#include <linux/vserver/context_cmd.h>
+#include <linux/err.h>
#include <asm/errno.h>
+#include "cvirt_init.h"
+#include "cacct_init.h"
+#include "limit_init.h"
+#include "sched_init.h"
+
+
+atomic_t vx_global_ctotal = ATOMIC_INIT(0);
+atomic_t vx_global_cactive = ATOMIC_INIT(0);
+
+
+/* now inactive context structures */
+
+static struct hlist_head vx_info_inactive = HLIST_HEAD_INIT;
+
+static spinlock_t vx_info_inactive_lock = SPIN_LOCK_UNLOCKED;
+
/* __alloc_vx_info()
static struct vx_info *__alloc_vx_info(xid_t xid)
{
struct vx_info *new = NULL;
-
- vxdprintk("alloc_vx_info(%d)\n", xid);
+ int cpu;
+
+ vxdprintk(VXD_CBIT(xid, 0), "alloc_vx_info(%d)*", xid);
/* would this benefit from a slab cache? */
new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
return 0;
memset (new, 0, sizeof(struct vx_info));
+#ifdef CONFIG_SMP
+ new->ptr_pc = alloc_percpu(struct _vx_info_pc);
+ if (!new->ptr_pc)
+ goto error;
+#endif
new->vx_id = xid;
- INIT_RCU_HEAD(&new->vx_rcu);
INIT_HLIST_NODE(&new->vx_hlist);
- atomic_set(&new->vx_refcnt, 0);
atomic_set(&new->vx_usecnt, 0);
+ atomic_set(&new->vx_tasks, 0);
+ new->vx_parent = NULL;
+ new->vx_state = 0;
+ init_waitqueue_head(&new->vx_wait);
+
+ /* prepare reaper */
+ get_task_struct(init_pid_ns.child_reaper);
+ new->vx_reaper = init_pid_ns.child_reaper;
/* rest of init goes here */
vx_info_init_limit(&new->limit);
vx_info_init_cvirt(&new->cvirt);
vx_info_init_cacct(&new->cacct);
- new->vx_flags = VXF_STATE_SETUP|VXF_STATE_INIT;
+ /* per cpu data structures */
+ for_each_possible_cpu(cpu) {
+ vx_info_init_sched_pc(
+ &vx_per_cpu(new, sched_pc, cpu), cpu);
+ vx_info_init_cvirt_pc(
+ &vx_per_cpu(new, cvirt_pc, cpu), cpu);
+ }
+
+ new->vx_flags = VXF_INIT_SET;
new->vx_bcaps = CAP_INIT_EFF_SET;
new->vx_ccaps = 0;
+ new->vx_cap_bset = cap_bset;
+
+ new->reboot_cmd = 0;
+ new->exit_code = 0;
- vxdprintk("alloc_vx_info(%d) = %p\n", xid, new);
+ vxdprintk(VXD_CBIT(xid, 0),
+ "alloc_vx_info(%d) = %p", xid, new);
+ vxh_alloc_vx_info(new);
+ atomic_inc(&vx_global_ctotal);
return new;
+#ifdef CONFIG_SMP
+error:
+ kfree(new);
+ return 0;
+#endif
}
/* __dealloc_vx_info()
static void __dealloc_vx_info(struct vx_info *vxi)
{
- vxdprintk("dealloc_vx_info(%p)\n", vxi);
+ int cpu;
+
+ vxdprintk(VXD_CBIT(xid, 0),
+ "dealloc_vx_info(%p)", vxi);
+ vxh_dealloc_vx_info(vxi);
- vxi->vx_hlist.next = LIST_POISON1;
vxi->vx_id = -1;
- if (vxi->vx_namespace)
- put_namespace(vxi->vx_namespace);
- if (vxi->vx_fs)
- put_fs_struct(vxi->vx_fs);
-
vx_info_exit_limit(&vxi->limit);
vx_info_exit_sched(&vxi->sched);
vx_info_exit_cvirt(&vxi->cvirt);
vx_info_exit_cacct(&vxi->cacct);
-
- BUG_ON(atomic_read(&vxi->vx_usecnt));
- BUG_ON(atomic_read(&vxi->vx_refcnt));
+ for_each_possible_cpu(cpu) {
+ vx_info_exit_sched_pc(
+ &vx_per_cpu(vxi, sched_pc, cpu), cpu);
+ vx_info_exit_cvirt_pc(
+ &vx_per_cpu(vxi, cvirt_pc, cpu), cpu);
+ }
+
+ vxi->vx_state |= VXS_RELEASED;
+
+#ifdef CONFIG_SMP
+ free_percpu(vxi->ptr_pc);
+#endif
kfree(vxi);
+ atomic_dec(&vx_global_ctotal);
+}
+
+static void __shutdown_vx_info(struct vx_info *vxi)
+{
+ struct nsproxy *nsproxy;
+ struct fs_struct *fs;
+
+ might_sleep();
+
+ vxi->vx_state |= VXS_SHUTDOWN;
+ vs_state_change(vxi, VSC_SHUTDOWN);
+
+ nsproxy = xchg(&vxi->vx_nsproxy, NULL);
+ fs = xchg(&vxi->vx_fs, NULL);
+
+ if (nsproxy)
+ put_nsproxy(nsproxy);
+ if (fs)
+ put_fs_struct(fs);
+}
+
+/* exported stuff */
+
+void free_vx_info(struct vx_info *vxi)
+{
+ unsigned long flags;
+
+ /* context shutdown is mandatory */
+ BUG_ON(!vx_info_state(vxi, VXS_SHUTDOWN));
+
+ BUG_ON(atomic_read(&vxi->vx_usecnt));
+ BUG_ON(atomic_read(&vxi->vx_tasks));
+
+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
+
+ BUG_ON(vxi->vx_nsproxy);
+ BUG_ON(vxi->vx_fs);
+
+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
+ hlist_del(&vxi->vx_hlist);
+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
+
+ __dealloc_vx_info(vxi);
}
/* hash table for vx_info hash */
-#define VX_HASH_SIZE 13
+#define VX_HASH_SIZE 13
-struct hlist_head vx_info_hash[VX_HASH_SIZE];
+static struct hlist_head vx_info_hash[VX_HASH_SIZE] =
+ { [0 ... VX_HASH_SIZE-1] = HLIST_HEAD_INIT };
static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
static inline void __hash_vx_info(struct vx_info *vxi)
{
struct hlist_head *head;
-
- vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
- get_vx_info(vxi);
+
+ vxd_assert_lock(&vx_info_hash_lock);
+ vxdprintk(VXD_CBIT(xid, 4),
+ "__hash_vx_info: %p[#%d]", vxi, vxi->vx_id);
+ vxh_hash_vx_info(vxi);
+
+ /* context must not be hashed */
+ BUG_ON(vx_info_state(vxi, VXS_HASHED));
+
+ vxi->vx_state |= VXS_HASHED;
head = &vx_info_hash[__hashval(vxi->vx_id)];
- hlist_add_head_rcu(&vxi->vx_hlist, head);
+ hlist_add_head(&vxi->vx_hlist, head);
+ atomic_inc(&vx_global_cactive);
}
/* __unhash_vx_info()
static inline void __unhash_vx_info(struct vx_info *vxi)
{
- vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
- hlist_del_rcu(&vxi->vx_hlist);
- put_vx_info(vxi);
+ unsigned long flags;
+
+ vxd_assert_lock(&vx_info_hash_lock);
+ vxdprintk(VXD_CBIT(xid, 4),
+ "__unhash_vx_info: %p[#%d.%d.%d]", vxi, vxi->vx_id,
+ atomic_read(&vxi->vx_usecnt), atomic_read(&vxi->vx_tasks));
+ vxh_unhash_vx_info(vxi);
+
+ /* context must be hashed */
+ BUG_ON(!vx_info_state(vxi, VXS_HASHED));
+ /* but without tasks */
+ BUG_ON(atomic_read(&vxi->vx_tasks));
+
+ vxi->vx_state &= ~VXS_HASHED;
+ hlist_del_init(&vxi->vx_hlist);
+ spin_lock_irqsave(&vx_info_inactive_lock, flags);
+ hlist_add_head(&vxi->vx_hlist, &vx_info_inactive);
+ spin_unlock_irqrestore(&vx_info_inactive_lock, flags);
+ atomic_dec(&vx_global_cactive);
}
/* __lookup_vx_info()
- * requires the rcu_read_lock()
+ * requires the hash_lock to be held
* doesn't increment the vx_refcnt */
static inline struct vx_info *__lookup_vx_info(xid_t xid)
{
struct hlist_head *head = &vx_info_hash[__hashval(xid)];
struct hlist_node *pos;
+ struct vx_info *vxi;
+ vxd_assert_lock(&vx_info_hash_lock);
hlist_for_each(pos, head) {
- struct vx_info *vxi =
- hlist_entry(pos, struct vx_info, vx_hlist);
+ vxi = hlist_entry(pos, struct vx_info, vx_hlist);
- if (vxi->vx_id == xid) {
- return vxi;
- }
+ if (vxi->vx_id == xid)
+ goto found;
}
- return NULL;
+ vxi = NULL;
+found:
+ vxdprintk(VXD_CBIT(xid, 0),
+ "__lookup_vx_info(#%u): %p[#%u]",
+ xid, vxi, vxi?vxi->vx_id:0);
+ vxh_lookup_vx_info(vxi, xid);
+ return vxi;
}
{
static xid_t seq = MAX_S_CONTEXT;
xid_t barrier = seq;
-
+
+ vxd_assert_lock(&vx_info_hash_lock);
do {
if (++seq > MAX_S_CONTEXT)
seq = MIN_D_CONTEXT;
- if (!__lookup_vx_info(seq))
+ if (!__lookup_vx_info(seq)) {
+ vxdprintk(VXD_CBIT(xid, 4),
+ "__vx_dynamic_id: [#%d]", seq);
return seq;
+ }
} while (barrier != seq);
return 0;
}
+#ifdef CONFIG_VSERVER_LEGACY
+
/* __loc_vx_info()
* locate or create the requested context
static struct vx_info * __loc_vx_info(int id, int *err)
{
struct vx_info *new, *vxi = NULL;
-
- vxdprintk("loc_vx_info(%d)\n", id);
+
+ vxdprintk(VXD_CBIT(xid, 1), "loc_vx_info(%d)*", id);
if (!(new = __alloc_vx_info(id))) {
*err = -ENOMEM;
return NULL;
}
+ /* required to make dynamic xids unique */
spin_lock(&vx_info_hash_lock);
/* dynamic context requested */
if (id == VX_DYNAMIC_ID) {
+#ifdef CONFIG_VSERVER_DYNAMIC_IDS
id = __vx_dynamic_id();
if (!id) {
printk(KERN_ERR "no dynamic context available.\n");
goto out_unlock;
}
new->vx_id = id;
+#else
+ printk(KERN_ERR "dynamic contexts disabled.\n");
+ goto out_unlock;
+#endif
}
/* existing context requested */
else if ((vxi = __lookup_vx_info(id))) {
/* context in setup is not available */
if (vxi->vx_flags & VXF_STATE_SETUP) {
- vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi);
+ vxdprintk(VXD_CBIT(xid, 0),
+ "loc_vx_info(%d) = %p (not available)", id, vxi);
vxi = NULL;
*err = -EBUSY;
} else {
- vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi);
+ vxdprintk(VXD_CBIT(xid, 0),
+ "loc_vx_info(%d) = %p (found)", id, vxi);
get_vx_info(vxi);
*err = 0;
}
}
/* new context requested */
- vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new);
+ vxdprintk(VXD_CBIT(xid, 0),
+ "loc_vx_info(%d) = %p (new)", id, new);
__hash_vx_info(get_vx_info(new));
vxi = new, new = NULL;
*err = 1;
out_unlock:
spin_unlock(&vx_info_hash_lock);
+ vxh_loc_vx_info(vxi, id);
if (new)
__dealloc_vx_info(new);
return vxi;
}
+#endif
+/* __create_vx_info()
-/* exported stuff */
+ * create the requested context
+ * get(), claim() and hash it */
+static struct vx_info * __create_vx_info(int id)
+{
+ struct vx_info *new, *vxi = NULL;
+ vxdprintk(VXD_CBIT(xid, 1), "create_vx_info(%d)*", id);
-void rcu_free_vx_info(void *obj)
-{
- struct vx_info *vxi = obj;
- int usecnt, refcnt;
+ if (!(new = __alloc_vx_info(id)))
+ return ERR_PTR(-ENOMEM);
- usecnt = atomic_read(&vxi->vx_usecnt);
- BUG_ON(usecnt < 0);
+ /* required to make dynamic xids unique */
+ spin_lock(&vx_info_hash_lock);
+
+ /* dynamic context requested */
+ if (id == VX_DYNAMIC_ID) {
+#ifdef CONFIG_VSERVER_DYNAMIC_IDS
+ id = __vx_dynamic_id();
+ if (!id) {
+ printk(KERN_ERR "no dynamic context available.\n");
+ vxi = ERR_PTR(-EAGAIN);
+ goto out_unlock;
+ }
+ new->vx_id = id;
+#else
+ printk(KERN_ERR "dynamic contexts disabled.\n");
+ vxi = ERR_PTR(-EINVAL);
+ goto out_unlock;
+#endif
+ }
+ /* static context requested */
+ else if ((vxi = __lookup_vx_info(id))) {
+ vxdprintk(VXD_CBIT(xid, 0),
+ "create_vx_info(%d) = %p (already there)", id, vxi);
+ if (vx_info_flags(vxi, VXF_STATE_SETUP, 0))
+ vxi = ERR_PTR(-EBUSY);
+ else
+ vxi = ERR_PTR(-EEXIST);
+ goto out_unlock;
+ }
+#ifdef CONFIG_VSERVER_DYNAMIC_IDS
+ /* dynamic xid creation blocker */
+ else if (id >= MIN_D_CONTEXT) {
+ vxdprintk(VXD_CBIT(xid, 0),
+ "create_vx_info(%d) (dynamic rejected)", id);
+ vxi = ERR_PTR(-EINVAL);
+ goto out_unlock;
+ }
+#endif
- refcnt = atomic_read(&vxi->vx_refcnt);
- BUG_ON(refcnt < 0);
+ /* new context */
+ vxdprintk(VXD_CBIT(xid, 0),
+ "create_vx_info(%d) = %p (new)", id, new);
+ claim_vx_info(new, NULL);
+ __hash_vx_info(get_vx_info(new));
+ vxi = new, new = NULL;
- if (!usecnt)
- __dealloc_vx_info(vxi);
- else
- printk("!!! rcu didn't free\n");
+out_unlock:
+ spin_unlock(&vx_info_hash_lock);
+ vxh_create_vx_info(IS_ERR(vxi)?NULL:vxi, id);
+ if (new)
+ __dealloc_vx_info(new);
+ return vxi;
}
+
+/* exported stuff */
+
+
void unhash_vx_info(struct vx_info *vxi)
{
+ __shutdown_vx_info(vxi);
spin_lock(&vx_info_hash_lock);
__unhash_vx_info(vxi);
spin_unlock(&vx_info_hash_lock);
+ __wakeup_vx_info(vxi);
}
-/* locate_vx_info()
- * search for a vx_info and get() it
+/* lookup_vx_info()
+
+ * search for a vx_info and get() it
* negative id means current */
-struct vx_info *locate_vx_info(int id)
+struct vx_info *lookup_vx_info(int id)
{
- struct vx_info *vxi;
-
+ struct vx_info *vxi = NULL;
+
if (id < 0) {
vxi = get_vx_info(current->vx_info);
- } else {
- rcu_read_lock();
+ } else if (id > 1) {
+ spin_lock(&vx_info_hash_lock);
vxi = get_vx_info(__lookup_vx_info(id));
- rcu_read_unlock();
+ spin_unlock(&vx_info_hash_lock);
}
return vxi;
}
-/* vx_info_is_hashed()
+/* xid_is_hashed()
* verify that xid is still hashed */
-int vx_info_is_hashed(xid_t xid)
+int xid_is_hashed(xid_t xid)
{
int hashed;
- rcu_read_lock();
+ spin_lock(&vx_info_hash_lock);
hashed = (__lookup_vx_info(xid) != NULL);
- rcu_read_unlock();
+ spin_unlock(&vx_info_hash_lock);
return hashed;
}
#ifdef CONFIG_VSERVER_LEGACY
-#if 0
-struct vx_info *alloc_vx_info(xid_t xid)
-{
- return __alloc_vx_info(xid);
-}
-#endif
-
-struct vx_info *locate_or_create_vx_info(int id)
+struct vx_info *lookup_or_create_vx_info(int id)
{
int err;
#ifdef CONFIG_PROC_FS
-#define hlist_for_each_rcu(pos, head) \
- for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
- pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
+/* get_xid_list()
+
+ * get a subset of hashed xids for proc
+ * assumes size is at least one */
int get_xid_list(int index, unsigned int *xids, int size)
{
int hindex, nr_xids = 0;
- rcu_read_lock();
+ /* only show current and children */
+ if (!vx_check(0, VS_ADMIN|VS_WATCH)) {
+ if (index > 0)
+ return 0;
+ xids[nr_xids] = vx_current_xid();
+ return 1;
+ }
+
for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
struct hlist_head *head = &vx_info_hash[hindex];
struct hlist_node *pos;
- hlist_for_each_rcu(pos, head) {
+ spin_lock(&vx_info_hash_lock);
+ hlist_for_each(pos, head) {
struct vx_info *vxi;
if (--index > 0)
continue;
vxi = hlist_entry(pos, struct vx_info, vx_hlist);
- xids[nr_xids] = vxi->vx_id;
- if (++nr_xids >= size)
+ xids[nr_xids] = vxi->vx_id;
+ if (++nr_xids >= size) {
+ spin_unlock(&vx_info_hash_lock);
goto out;
+ }
}
+ /* keep the lock time short */
+ spin_unlock(&vx_info_hash_lock);
}
out:
- rcu_read_unlock();
return nr_xids;
}
#endif
+#ifdef CONFIG_VSERVER_DEBUG
+
+void dump_vx_info_inactive(int level)
+{
+ struct hlist_node *entry, *next;
+
+ hlist_for_each_safe(entry, next, &vx_info_inactive) {
+ struct vx_info *vxi =
+ list_entry(entry, struct vx_info, vx_hlist);
+
+ dump_vx_info(vxi, level);
+ }
+}
+
+#endif
+
int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
{
struct user_struct *new_user, *old_user;
-
+
if (!p || !vxi)
BUG();
+
+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0))
+ return -EACCES;
+
new_user = alloc_uid(vxi->vx_id, p->uid);
if (!new_user)
return -ENOMEM;
return 0;
}
-void vx_mask_bcaps(struct task_struct *p)
+void vx_mask_cap_bset(struct vx_info *vxi, struct task_struct *p)
{
- struct vx_info *vxi = p->vx_info;
-
- p->cap_effective &= vxi->vx_bcaps;
- p->cap_inheritable &= vxi->vx_bcaps;
- p->cap_permitted &= vxi->vx_bcaps;
+ p->cap_effective &= vxi->vx_cap_bset;
+ p->cap_inheritable &= vxi->vx_cap_bset;
+ p->cap_permitted &= vxi->vx_cap_bset;
}
#include <linux/file.h>
-static inline int vx_nofiles_task(struct task_struct *tsk)
-{
- struct files_struct *files = tsk->files;
- const unsigned long *obptr, *cbptr;
- int count, total;
-
- spin_lock(&files->file_lock);
- obptr = files->open_fds->fds_bits;
- cbptr = files->close_on_exec->fds_bits;
- count = files->max_fds / (sizeof(unsigned long) * 8);
- for (total = 0; count > 0; count--) {
- if (*obptr)
- total += hweight_long(*obptr);
- obptr++;
- /* if (*cbptr)
- total += hweight_long(*cbptr);
- cbptr++; */
- }
- spin_unlock(&files->file_lock);
- return total;
-}
-
-static inline int vx_openfd_task(struct task_struct *tsk)
+static int vx_openfd_task(struct task_struct *tsk)
{
struct files_struct *files = tsk->files;
+ struct fdtable *fdt;
const unsigned long *bptr;
int count, total;
+ /* no rcu_read_lock() because of spin_lock() */
spin_lock(&files->file_lock);
- bptr = files->open_fds->fds_bits;
- count = files->max_fds / (sizeof(unsigned long) * 8);
+ fdt = files_fdtable(files);
+ bptr = fdt->open_fds->fds_bits;
+ count = fdt->max_fds / (sizeof(unsigned long) * 8);
for (total = 0; count > 0; count--) {
if (*bptr)
total += hweight_long(*bptr);
return total;
}
+
+/* for *space compatibility */
+
+asmlinkage long sys_unshare(unsigned long);
+
/*
* migrate task to new context
* gets vxi, puts old_vxi on change
+ * optionally unshares namespaces (hack)
*/
-int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
+int vx_migrate_task(struct task_struct *p, struct vx_info *vxi, int unshare)
{
struct vx_info *old_vxi;
int ret = 0;
-
+
if (!p || !vxi)
BUG();
+ vxdprintk(VXD_CBIT(xid, 5),
+ "vx_migrate_task(%p,%p[#%d.%d])", p, vxi,
+ vxi->vx_id, atomic_read(&vxi->vx_usecnt));
+
+ if (vx_info_flags(vxi, VXF_INFO_PRIVATE, 0) &&
+ !vx_info_flags(vxi, VXF_STATE_SETUP, 0))
+ return -EACCES;
+
+ if (vx_info_state(vxi, VXS_SHUTDOWN))
+ return -EFAULT;
+
old_vxi = task_get_vx_info(p);
if (old_vxi == vxi)
goto out;
- vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
- vxi->vx_id, atomic_read(&vxi->vx_usecnt));
-
if (!(ret = vx_migrate_user(p, vxi))) {
+ int openfd;
+
task_lock(p);
+ openfd = vx_openfd_task(p);
+
if (old_vxi) {
- atomic_dec(&old_vxi->cacct.nr_threads);
- atomic_dec(&old_vxi->limit.res[RLIMIT_NPROC]);
- }
- atomic_inc(&vxi->cacct.nr_threads);
- atomic_inc(&vxi->limit.res[RLIMIT_NPROC]);
- atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]);
- atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]);
- /* should be handled in set_vx_info !! */
- if (old_vxi)
+ atomic_dec(&old_vxi->cvirt.nr_threads);
+ atomic_dec(&old_vxi->cvirt.nr_running);
+ __rlim_dec(&old_vxi->limit, RLIMIT_NPROC);
+ /* FIXME: what about the struct files here? */
+ __rlim_sub(&old_vxi->limit, VLIMIT_OPENFD, openfd);
+ /* account for the executable */
+ __rlim_dec(&old_vxi->limit, VLIMIT_DENTRY);
+ }
+ atomic_inc(&vxi->cvirt.nr_threads);
+ atomic_inc(&vxi->cvirt.nr_running);
+ __rlim_inc(&vxi->limit, RLIMIT_NPROC);
+ /* FIXME: what about the struct files here? */
+ __rlim_add(&vxi->limit, VLIMIT_OPENFD, openfd);
+ /* account for the executable */
+ __rlim_inc(&vxi->limit, VLIMIT_DENTRY);
+
+ if (old_vxi) {
+ release_vx_info(old_vxi, p);
clr_vx_info(&p->vx_info);
+ }
+ claim_vx_info(vxi, p);
set_vx_info(&p->vx_info, vxi);
p->xid = vxi->vx_id;
- vx_mask_bcaps(p);
+
+ vxdprintk(VXD_CBIT(xid, 5),
+ "moved task %p into vxi:%p[#%d]",
+ p, vxi, vxi->vx_id);
+
+ vx_mask_cap_bset(vxi, p);
task_unlock(p);
- put_vx_info(old_vxi);
+ /* hack for *spaces to provide compatibility */
+ if (unshare) {
+ ret = sys_unshare(CLONE_NEWUTS|CLONE_NEWIPC);
+ vx_set_space(vxi, CLONE_NEWUTS|CLONE_NEWIPC);
+ }
}
out:
put_vx_info(old_vxi);
return ret;
}
+int vx_set_reaper(struct vx_info *vxi, struct task_struct *p)
+{
+ struct task_struct *old_reaper;
+
+ if (!vxi)
+ return -EINVAL;
+
+ vxdprintk(VXD_CBIT(xid, 6),
+ "vx_set_reaper(%p[#%d],%p[#%d,%d])",
+ vxi, vxi->vx_id, p, p->xid, p->pid);
+
+ old_reaper = vxi->vx_reaper;
+ if (old_reaper == p)
+ return 0;
+
+ /* set new child reaper */
+ get_task_struct(p);
+ vxi->vx_reaper = p;
+ put_task_struct(old_reaper);
+ return 0;
+}
+
int vx_set_init(struct vx_info *vxi, struct task_struct *p)
{
if (!vxi)
return -EINVAL;
- if (vxi->vx_initpid)
- return -EPERM;
- vxi->vx_initpid = p->tgid;
+ vxdprintk(VXD_CBIT(xid, 6),
+ "vx_set_init(%p[#%d],%p[#%d,%d,%d])",
+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
+
+ vxi->vx_flags &= ~VXF_STATE_INIT;
+ vxi->vx_initpid = p->tgid;
return 0;
}
+void vx_exit_init(struct vx_info *vxi, struct task_struct *p, int code)
+{
+ vxdprintk(VXD_CBIT(xid, 6),
+ "vx_exit_init(%p[#%d],%p[#%d,%d,%d])",
+ vxi, vxi->vx_id, p, p->xid, p->pid, p->tgid);
+
+ vxi->exit_code = code;
+ vxi->vx_initpid = 0;
+}
+
+
+void vx_set_persistent(struct vx_info *vxi)
+{
+ vxdprintk(VXD_CBIT(xid, 6),
+ "vx_set_persistent(%p[#%d])", vxi, vxi->vx_id);
+
+ get_vx_info(vxi);
+ claim_vx_info(vxi, NULL);
+}
+
+void vx_clear_persistent(struct vx_info *vxi)
+{
+ vxdprintk(VXD_CBIT(xid, 6),
+ "vx_clear_persistent(%p[#%d])", vxi, vxi->vx_id);
+
+ release_vx_info(vxi, NULL);
+ put_vx_info(vxi);
+}
+
+void vx_update_persistent(struct vx_info *vxi)
+{
+ if (vx_info_flags(vxi, VXF_PERSISTENT, 0))
+ vx_set_persistent(vxi);
+ else
+ vx_clear_persistent(vxi);
+}
+
+
+/* task must be current or locked */
+
+void exit_vx_info(struct task_struct *p, int code)
+{
+ struct vx_info *vxi = p->vx_info;
+
+ if (vxi) {
+ atomic_dec(&vxi->cvirt.nr_threads);
+ vx_nproc_dec(p);
+
+ vxi->exit_code = code;
+ release_vx_info(vxi, p);
+ }
+}
+
+void exit_vx_info_early(struct task_struct *p, int code)
+{
+ struct vx_info *vxi = p->vx_info;
+
+ if (vxi) {
+ if (vxi->vx_initpid == p->tgid)
+ vx_exit_init(vxi, p, code);
+ if (vxi->vx_reaper == p)
+ vx_set_reaper(vxi, init_pid_ns.child_reaper);
+ }
+}
+
/* vserver syscall commands below here */
int vc_task_xid(uint32_t id, void __user *data)
{
- xid_t xid;
+ xid_t xid;
- if (id) {
- struct task_struct *tsk;
+ if (id) {
+ struct task_struct *tsk;
- if (!vx_check(0, VX_ADMIN|VX_WATCH))
- return -EPERM;
+ if (!vx_check(0, VS_ADMIN|VS_WATCH))
+ return -EPERM;
- read_lock(&tasklist_lock);
- tsk = find_task_by_pid(id);
- xid = (tsk) ? tsk->xid : -ESRCH;
- read_unlock(&tasklist_lock);
- }
- else
- xid = current->xid;
- return xid;
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_real_pid(id);
+ xid = (tsk) ? tsk->xid : -ESRCH;
+ read_unlock(&tasklist_lock);
+ }
+ else
+ xid = vx_current_xid();
+ return xid;
}
-int vc_vx_info(uint32_t id, void __user *data)
+int vc_vx_info(struct vx_info *vxi, void __user *data)
{
- struct vx_info *vxi;
struct vcmd_vx_info_v0 vc_data;
- if (!vx_check(0, VX_ADMIN))
- return -ENOSYS;
- if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
- return -EPERM;
-
- vxi = locate_vx_info(id);
- if (!vxi)
- return -ESRCH;
-
vc_data.xid = vxi->vx_id;
vc_data.initpid = vxi->vx_initpid;
- put_vx_info(vxi);
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
+
+
+int vc_ctx_stat(struct vx_info *vxi, void __user *data)
+{
+ struct vcmd_ctx_stat_v0 vc_data;
+
+ vc_data.usecnt = atomic_read(&vxi->vx_usecnt);
+ vc_data.tasks = atomic_read(&vxi->vx_tasks);
if (copy_to_user (data, &vc_data, sizeof(vc_data)))
return -EFAULT;
int vc_ctx_create(uint32_t xid, void __user *data)
{
+ struct vcmd_ctx_create vc_data = { .flagword = VXF_INIT_SET };
struct vx_info *new_vxi;
int ret;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
- if ((xid >= MIN_D_CONTEXT) && (xid != VX_DYNAMIC_ID))
+ if ((xid > MAX_S_CONTEXT) && (xid != VX_DYNAMIC_ID))
return -EINVAL;
-
- if (xid < 1)
+ if (xid < 2)
return -EINVAL;
- new_vxi = __loc_vx_info(xid, &ret);
- if (!new_vxi)
- return ret;
- if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
- ret = -EEXIST;
- goto out_put;
- }
+ new_vxi = __create_vx_info(xid);
+ if (IS_ERR(new_vxi))
+ return PTR_ERR(new_vxi);
+
+ /* initial flags */
+ new_vxi->vx_flags = vc_data.flagword;
+ ret = -ENOEXEC;
+ if (vs_state_change(new_vxi, VSC_STARTUP))
+ goto out;
+
+ ret = vx_migrate_task(current, new_vxi, (!data));
+ if (ret)
+ goto out;
+
+ /* return context id on success */
ret = new_vxi->vx_id;
- vx_migrate_task(current, new_vxi);
- /* if this fails, we might end up with a hashed vx_info */
-out_put:
+
+ /* get a reference for persistent contexts */
+ if ((vc_data.flagword & VXF_PERSISTENT))
+ vx_set_persistent(new_vxi);
+out:
+ release_vx_info(new_vxi, NULL);
put_vx_info(new_vxi);
return ret;
}
-int vc_ctx_migrate(uint32_t id, void __user *data)
+int vc_ctx_migrate(struct vx_info *vxi, void __user *data)
{
- struct vx_info *vxi;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ struct vcmd_ctx_migrate vc_data = { .flagword = 0 };
+ int ret;
- /* dirty hack until Spectator becomes a cap */
- if (id == 1) {
- current->xid = 1;
- return 0;
- }
+ if (data && copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
- vxi = locate_vx_info(id);
- if (!vxi)
- return -ESRCH;
- vx_migrate_task(current, vxi);
- put_vx_info(vxi);
- return 0;
+ ret = vx_migrate_task(current, vxi, 0);
+ if (ret)
+ return ret;
+ if (vc_data.flagword & VXM_SET_INIT)
+ ret = vx_set_init(vxi, current);
+ if (ret)
+ return ret;
+ if (vc_data.flagword & VXM_SET_REAPER)
+ ret = vx_set_reaper(vxi, current);
+ return ret;
}
-int vc_get_cflags(uint32_t id, void __user *data)
+int vc_get_cflags(struct vx_info *vxi, void __user *data)
{
- struct vx_info *vxi;
struct vcmd_ctx_flags_v0 vc_data;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- vxi = locate_vx_info(id);
- if (!vxi)
- return -ESRCH;
-
vc_data.flagword = vxi->vx_flags;
/* special STATE flag handling */
- vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
-
- put_vx_info(vxi);
+ vc_data.mask = vs_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
if (copy_to_user (data, &vc_data, sizeof(vc_data)))
return -EFAULT;
return 0;
}
-int vc_set_cflags(uint32_t id, void __user *data)
+int vc_set_cflags(struct vx_info *vxi, void __user *data)
{
- struct vx_info *vxi;
struct vcmd_ctx_flags_v0 vc_data;
uint64_t mask, trigger;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
- vxi = locate_vx_info(id);
- if (!vxi)
- return -ESRCH;
-
/* special STATE flag handling */
- mask = vx_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
+ mask = vs_mask_mask(vc_data.mask, vxi->vx_flags, VXF_ONE_TIME);
trigger = (mask & vxi->vx_flags) ^ (mask & vc_data.flagword);
- if (trigger & VXF_STATE_SETUP)
- vx_mask_bcaps(current);
- if (trigger & VXF_STATE_INIT)
- if (vxi == current->vx_info)
- vx_set_init(vxi, current);
+ if (vxi == current->vx_info) {
+ if (trigger & VXF_STATE_SETUP)
+ vx_mask_cap_bset(vxi, current);
+ if (trigger & VXF_STATE_INIT) {
+ int ret;
+
+ ret = vx_set_init(vxi, current);
+ if (ret)
+ return ret;
+ ret = vx_set_reaper(vxi, current);
+ if (ret)
+ return ret;
+ }
+ }
- vxi->vx_flags = vx_mask_flags(vxi->vx_flags,
+ vxi->vx_flags = vs_mask_flags(vxi->vx_flags,
vc_data.flagword, mask);
- put_vx_info(vxi);
+ if (trigger & VXF_PERSISTENT)
+ vx_update_persistent(vxi);
+
return 0;
}
-int vc_get_ccaps(uint32_t id, void __user *data)
+static int do_get_caps(struct vx_info *vxi, uint64_t *bcaps, uint64_t *ccaps)
+{
+ if (bcaps)
+ *bcaps = vxi->vx_bcaps;
+ if (ccaps)
+ *ccaps = vxi->vx_ccaps;
+
+ return 0;
+}
+
+int vc_get_ccaps_v0(struct vx_info *vxi, void __user *data)
{
- struct vx_info *vxi;
struct vcmd_ctx_caps_v0 vc_data;
+ int ret;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ ret = do_get_caps(vxi, &vc_data.bcaps, &vc_data.ccaps);
+ if (ret)
+ return ret;
+ vc_data.cmask = ~0UL;
- vxi = locate_vx_info(id);
- if (!vxi)
- return -ESRCH;
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
+ return 0;
+}
- vc_data.bcaps = vxi->vx_bcaps;
- vc_data.ccaps = vxi->vx_ccaps;
+int vc_get_ccaps(struct vx_info *vxi, void __user *data)
+{
+ struct vcmd_ctx_caps_v1 vc_data;
+ int ret;
+
+ ret = do_get_caps(vxi, NULL, &vc_data.ccaps);
+ if (ret)
+ return ret;
vc_data.cmask = ~0UL;
- put_vx_info(vxi);
if (copy_to_user (data, &vc_data, sizeof(vc_data)))
return -EFAULT;
return 0;
}
-int vc_set_ccaps(uint32_t id, void __user *data)
+static int do_set_caps(struct vx_info *vxi,
+ uint64_t bcaps, uint64_t bmask, uint64_t ccaps, uint64_t cmask)
+{
+ vxi->vx_bcaps = vs_mask_flags(vxi->vx_bcaps, bcaps, bmask);
+ vxi->vx_ccaps = vs_mask_flags(vxi->vx_ccaps, ccaps, cmask);
+
+ return 0;
+}
+
+int vc_set_ccaps_v0(struct vx_info *vxi, void __user *data)
{
- struct vx_info *vxi;
struct vcmd_ctx_caps_v0 vc_data;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
- vxi = locate_vx_info(id);
- if (!vxi)
- return -ESRCH;
-
- vxi->vx_bcaps &= vc_data.bcaps;
- vxi->vx_ccaps = vx_mask_flags(vxi->vx_ccaps,
+ /* simulate old &= behaviour for bcaps */
+ return do_set_caps(vxi, 0, ~vc_data.bcaps,
vc_data.ccaps, vc_data.cmask);
- put_vx_info(vxi);
+}
+
+int vc_set_ccaps(struct vx_info *vxi, void __user *data)
+{
+ struct vcmd_ctx_caps_v1 vc_data;
+
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ return do_set_caps(vxi, 0, 0, vc_data.ccaps, vc_data.cmask);
+}
+
+int vc_get_bcaps(struct vx_info *vxi, void __user *data)
+{
+ struct vcmd_bcaps vc_data;
+ int ret;
+
+ ret = do_get_caps(vxi, &vc_data.bcaps, NULL);
+ if (ret)
+ return ret;
+ vc_data.bmask = ~0UL;
+
+ if (copy_to_user (data, &vc_data, sizeof(vc_data)))
+ return -EFAULT;
return 0;
}
+int vc_set_bcaps(struct vx_info *vxi, void __user *data)
+{
+ struct vcmd_bcaps vc_data;
+
+ if (copy_from_user (&vc_data, data, sizeof(vc_data)))
+ return -EFAULT;
+
+ return do_set_caps(vxi, vc_data.bcaps, vc_data.bmask, 0, 0);
+}
+
#include <linux/module.h>
-EXPORT_SYMBOL_GPL(rcu_free_vx_info);
-EXPORT_SYMBOL_GPL(vx_info_hash_lock);
+EXPORT_SYMBOL_GPL(free_vx_info);