patch-2.6.6-vs1.9.1
authorMark Huang <mlhuang@cs.princeton.edu>
Wed, 2 Jun 2004 21:13:46 +0000 (21:13 +0000)
committerMark Huang <mlhuang@cs.princeton.edu>
Wed, 2 Jun 2004 21:13:46 +0000 (21:13 +0000)
21 files changed:
Makefile
fs/ext2/ioctl.c
fs/ext3/ioctl.c
fs/namespace.c
fs/proc/array.c
fs/reiserfs/ioctl.c
include/linux/ninline.h
include/linux/vinline.h
include/linux/vserver/context.h
include/linux/vserver/network.h
include/linux/vserver/sched.h
kernel/fork.c
kernel/vserver/context.c
kernel/vserver/init.c
kernel/vserver/legacy.c
kernel/vserver/limit.c
kernel/vserver/namespace.c
kernel/vserver/network.c
kernel/vserver/proc.c
kernel/vserver/sched.c
kernel/vserver/signal.c

index 1511e96..78f0c1b 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 6
-EXTRAVERSION = -vs1.9.0
+EXTRAVERSION = -vs1.9.1
 NAME=Zonked Quokka
 
 # *DOCUMENTATION*
index 101055b..11ee1a2 100644 (file)
@@ -49,7 +49,9 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 *
                 * This test looks nicer. Thanks to Pauline Middelink
                 */
-               if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
+               if ((oldflags & EXT2_IMMUTABLE_FL) ||
+                       ((flags ^ oldflags) &
+                       (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL))) {
                        if (!capable(CAP_LINUX_IMMUTABLE))
                                return -EPERM;
                }
index 82c325f..21080fb 100644 (file)
@@ -58,7 +58,9 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                 *
                 * This test looks nicer. Thanks to Pauline Middelink
                 */
-               if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
+               if ((oldflags & EXT3_IMMUTABLE_FL) ||
+                       ((flags ^ oldflags) &
+                       (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL))) {
                        if (!capable(CAP_LINUX_IMMUTABLE))
                                return -EPERM;
                }
index dfeac21..fa8b30e 100644 (file)
@@ -691,7 +691,7 @@ static int do_add_mount(struct nameidata *nd, char *type, int flags,
                return -EINVAL;
 
        /* we need capabilities... */
-       if (!capable(CAP_SYS_ADMIN))
+       if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT))
                return -EPERM;
 
        mnt = do_kern_mount(type, flags, name, data);
index 4a2cce7..c210ba3 100644 (file)
@@ -301,7 +301,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
        vxi = task_get_vx_info(task);
        if (vxi) {
                buffer += sprintf (buffer,"ctxflags: %08llx\n"
-                       ,vxi->vx_flags);
+                       ,(unsigned long long)vxi->vx_flags);
                buffer += sprintf (buffer,"initpid: %d\n"
                        ,vxi->vx_initpid);
        } else {
@@ -322,8 +322,6 @@ int proc_pid_status(struct task_struct *task, char * buffer)
                *buffer++ = '\n';
                buffer += sprintf (buffer,"ipv4root_bcast: %08x\n"
                        ,nxi->v4_bcast);
-               buffer += sprintf (buffer,"ipv4root_refcnt: %d\n"
-                       ,atomic_read(&nxi->nx_refcount));
        } else {
                buffer += sprintf (buffer,"ipv4root: 0\n");
                buffer += sprintf (buffer,"ipv4root_bcast: 0\n");
index eaddaf9..0e225be 100644 (file)
@@ -49,9 +49,9 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
                        return -EFAULT;
 
                oldflags = REISERFS_I(inode) -> i_attrs;
-               if ( ( ( flags ^ oldflags) &
-                  ( REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) &&
-                    !capable( CAP_LINUX_IMMUTABLE ) )
+               if ( (oldflags & REISERFS_IMMUTABLE_FL) || ( ( (flags ^ oldflags) &
+                  (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) &&
+                    !capable( CAP_LINUX_IMMUTABLE ) ) )
                        return -EPERM;
                        
                if( ( flags & REISERFS_NOTAIL_FL ) &&
index d3f7525..d07fc84 100644 (file)
@@ -5,6 +5,7 @@
 // #define NX_DEBUG
 
 #include <linux/kernel.h>
+#include <linux/rcupdate.h>
 #include <linux/sched.h>
 
 #include "vserver/network.h"
 #endif
 
 
-void free_nx_info(struct nx_info *);
-
 extern int proc_pid_nx_info(struct task_struct *, char *);
 
 
 #define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__)
 
-static __inline__ struct nx_info *__get_nx_info(struct nx_info *nxi, const char *_file, int _line)
+static inline struct nx_info *__get_nx_info(struct nx_info *nxi,
+       const char *_file, int _line)
 {
        if (!nxi)
                return NULL;
-       nxdprintk("get_nx_info(%p[%d.%d])\t%s:%d\n",
-               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+       nxdprintk("get_nx_info(%p[#%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
                _file, _line);
-       atomic_inc(&nxi->nx_refcount);
+       atomic_inc(&nxi->nx_usecnt);
        return nxi;
 }
 
+
+#define        free_nx_info(nxi)       \
+       call_rcu(&nxi->nx_rcu, rcu_free_nx_info, nxi);
+
 #define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__)
 
-static __inline__ void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
+static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line)
 {
        if (!nxi)
                return;
-       nxdprintk("put_nx_info(%p[%d.%d])\t%s:%d\n",
-               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+       nxdprintk("put_nx_info(%p[#%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0,
                _file, _line);
-       if (atomic_dec_and_lock(&nxi->nx_refcount, &nxlist_lock)) {
-               list_del(&nxi->nx_list);
-               spin_unlock(&nxlist_lock);
+       if (atomic_dec_and_test(&nxi->nx_usecnt))
                free_nx_info(nxi);
-       }
 }
 
 
@@ -59,9 +60,12 @@ static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi,
        BUG_ON(*nxp);
        if (!nxi)
                return;
-       nxdprintk("set_nx_info(%p[#%d.%d])\t%s:%d\n",
-               nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0,
+       nxdprintk("set_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               nxi, nxi?nxi->nx_id:0,
+               nxi?atomic_read(&nxi->nx_usecnt):0,
+               nxi?atomic_read(&nxi->nx_refcnt):0,
                _file, _line);
+       atomic_inc(&nxi->nx_refcnt);
        *nxp = __get_nx_info(nxi, _file, _line);
 }
 
@@ -74,11 +78,15 @@ static inline void __clr_nx_info(struct nx_info **nxp,
 
        if (!nxo)
                return;
-       nxdprintk("clr_nx_info(%p[#%d.%d])\t%s:%d\n",
-               nxo, nxo?nxo->nx_id:0, nxo?atomic_read(&nxo->nx_refcount):0,
+       nxdprintk("clr_nx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               nxo, nxo?nxo->nx_id:0,
+               nxo?atomic_read(&nxo->nx_usecnt):0,
+               nxo?atomic_read(&nxo->nx_refcnt):0,
                _file, _line);
        *nxp = NULL;
        wmb();
+       if (nxo && atomic_dec_and_test(&nxo->nx_refcnt))
+               unhash_nx_info(nxo);
        __put_nx_info(nxo, _file, _line);
 }
 
index 07bb369..ce2bf36 100644 (file)
@@ -5,6 +5,7 @@
 // #define VX_DEBUG
 
 #include <linux/kernel.h>
+#include <linux/rcupdate.h>
 #include <linux/sched.h>
 
 #include "vserver/context.h"
 
 
 
-void free_vx_info(struct vx_info *);
-
 extern int proc_pid_vx_info(struct task_struct *, char *);
 
 
 #define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__)
 
-static __inline__ struct vx_info *__get_vx_info(struct vx_info *vxi,
+static inline struct vx_info *__get_vx_info(struct vx_info *vxi,
        const char *_file, int _line)
 {
        if (!vxi)
                return NULL;
        vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n",
-               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
                _file, _line);
-       atomic_inc(&vxi->vx_refcount);
+       atomic_inc(&vxi->vx_usecnt);
        return vxi;
 }
 
+
+#define        free_vx_info(vxi)       \
+       call_rcu(&vxi->vx_rcu, rcu_free_vx_info, vxi);
+
 #define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__)
 
-static __inline__ void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
+static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line)
 {
        if (!vxi)
                return;
        vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n",
-               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0,
                _file, _line);
-       if (atomic_dec_and_lock(&vxi->vx_refcount, &vxlist_lock)) {
-               list_del(&vxi->vx_list);
-               spin_unlock(&vxlist_lock);
+       if (atomic_dec_and_test(&vxi->vx_usecnt))
                free_vx_info(vxi);
-       }
 }
 
 #define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__)
@@ -62,9 +62,12 @@ static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi,
        BUG_ON(*vxp);
        if (!vxi)
                return;
-       vxdprintk("set_vx_info(%p[#%d.%d])\t%s:%d\n",
-               vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0,
+       vxdprintk("set_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               vxi, vxi?vxi->vx_id:0,
+               vxi?atomic_read(&vxi->vx_usecnt):0,
+               vxi?atomic_read(&vxi->vx_refcnt):0,
                _file, _line);
+       atomic_inc(&vxi->vx_refcnt);
        *vxp = __get_vx_info(vxi, _file, _line);
 }
 
@@ -75,11 +78,17 @@ static inline void __clr_vx_info(struct vx_info **vxp,
 {
        struct vx_info *vxo = *vxp;
 
-       vxdprintk("clr_vx_info(%p[#%d.%d])\t%s:%d\n",
-               vxo, vxo?vxo->vx_id:0, vxo?atomic_read(&vxo->vx_refcount):0,
+       if (!vxo)
+               return;
+       vxdprintk("clr_vx_info(%p[#%d.%d.%d])\t%s:%d\n",
+               vxo, vxo?vxo->vx_id:0,
+               vxo?atomic_read(&vxo->vx_usecnt):0,
+               vxo?atomic_read(&vxo->vx_refcnt):0,
                _file, _line);
        *vxp = NULL;
        wmb();
+       if (vxo && atomic_dec_and_test(&vxo->vx_refcnt))
+               unhash_vx_info(vxo);
        __put_vx_info(vxo, _file, _line);
 }
 
index 7692603..fffb3b9 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
+#include <linux/rcupdate.h>
 
 #define _VX_INFO_DEF_
 #include "cvirt.h"
 #undef _VX_INFO_DEF_
 
 struct vx_info {
-       struct list_head vx_list;               /* linked list of contexts */
+       struct hlist_node vx_hlist;             /* linked list of contexts */
+       struct rcu_head vx_rcu;                 /* the rcu head */
        xid_t vx_id;                            /* context id */
-       atomic_t vx_refcount;                   /* refcount */
+       atomic_t vx_usecnt;                     /* usage count */
+       atomic_t vx_refcnt;                     /* reference count */
        struct vx_info *vx_parent;              /* parent context */
 
        struct namespace *vx_namespace;         /* private namespace */
@@ -42,10 +45,6 @@ struct vx_info {
 };
 
 
-extern spinlock_t vxlist_lock;
-extern struct list_head vx_infos;
-
-
 #define VX_ADMIN       0x0001
 #define VX_WATCH       0x0002
 #define VX_DUMMY       0x0008
@@ -63,11 +62,14 @@ extern struct list_head vx_infos;
 #define VX_ATR_MASK    0x0F00
 
 
-void free_vx_info(struct vx_info *);
+extern void rcu_free_vx_info(void *);
+extern void unhash_vx_info(struct vx_info *);
+
+extern struct vx_info *locate_vx_info(int);
+extern struct vx_info *locate_or_create_vx_info(int);
 
-extern struct vx_info *find_vx_info(int);
-extern struct vx_info *find_or_create_vx_info(int);
-extern int vx_info_id_valid(int);
+extern int get_xid_list(int, unsigned int *, int);
+extern int vx_info_is_hashed(xid_t);
 
 extern int vx_migrate_task(struct task_struct *, struct vx_info *);
 
index b3c39b0..086f566 100644 (file)
@@ -3,7 +3,7 @@
 
 #define MAX_N_CONTEXT  65535   /* Arbitrary limit */
 
-#define IP_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
+#define NX_DYNAMIC_ID  ((uint32_t)-1)          /* id for dynamic context */
 
 #define NB_IPV4ROOT    16
 
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/utsname.h>
+#include <linux/rcupdate.h>
 #include <asm/resource.h>
 #include <asm/atomic.h>
 
 
 struct nx_info {
-       struct list_head nx_list;       /* linked list of nxinfos */
+       struct hlist_node nx_hlist;     /* linked list of nxinfos */
+       struct rcu_head nx_rcu;         /* the rcu head */
        nid_t nx_id;                    /* vnet id */
-       atomic_t nx_refcount;
+       atomic_t nx_usecnt;             /* usage count */
+       atomic_t nx_refcnt;             /* reference count */
 
        uint64_t nx_flags;              /* network flag word */
        uint64_t nx_ncaps;              /* network capabilities */
@@ -38,15 +41,16 @@ struct nx_info {
 };
 
 
-extern spinlock_t nxlist_lock;
-extern struct list_head nx_infos;
+extern void rcu_free_nx_info(void *);
+extern void unhash_nx_info(struct nx_info *);
 
+extern struct nx_info *locate_nx_info(int);
+extern struct nx_info *locate_or_create_nx_info(int);
 
-void free_nx_info(struct nx_info *);
-struct nx_info *create_nx_info(void);
+extern int get_nid_list(int, unsigned int *, int);
+extern int nx_info_is_hashed(nid_t);
 
-extern struct nx_info *find_nx_info(int);
-extern int nx_info_id_valid(int);
+extern int nx_migrate_task(struct task_struct *, struct nx_info *);
 
 struct in_ifaddr;
 struct net_device;
index d1a2068..0f44693 100644 (file)
@@ -24,15 +24,15 @@ struct _vx_sched {
 
 static inline void vx_info_init_sched(struct _vx_sched *sched)
 {
-        /* scheduling; hard code starting values as constants */
-        sched->fill_rate       = 1;
-        sched->interval                = 4;
-        sched->tokens_min      = HZ >> 4;
-        sched->tokens_max      = HZ >> 1;
-        sched->jiffies         = jiffies;
-        sched->tokens_lock     = SPIN_LOCK_UNLOCKED;
-
-        atomic_set(&sched->tokens, HZ >> 2);
+       /* scheduling; hard code starting values as constants */
+       sched->fill_rate        = 1;
+       sched->interval         = 4;
+       sched->tokens_min       = HZ >> 4;
+       sched->tokens_max       = HZ >> 1;
+       sched->jiffies          = jiffies;
+       sched->tokens_lock      = SPIN_LOCK_UNLOCKED;
+
+       atomic_set(&sched->tokens, HZ >> 2);
        sched->cpus_allowed     = CPU_MASK_ALL;
 }
 
@@ -50,7 +50,7 @@ static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer)
                "Interval:\t%8d\n"              
                "TokensMin:\t%8d\n"
                "TokensMax:\t%8d\n"
-               ,sched->ticks
+               ,(unsigned long long)sched->ticks
                ,atomic_read(&sched->tokens)
                ,sched->fill_rate
                ,sched->interval
@@ -119,9 +119,9 @@ static inline int vx_need_resched(struct task_struct *p)
                int tokens;
 
                p->time_slice--;
-               if (atomic_read(&vxi->vx_refcount) < 1)
+               if (atomic_read(&vxi->vx_usecnt) < 1)
                        printk("need_resched: p=%p, s=%ld, ref=%d, id=%d/%d\n",
-                               p, p->state, atomic_read(&vxi->vx_refcount),
+                               p, p->state, atomic_read(&vxi->vx_usecnt),
                                vxi->vx_id, p->xid);
                if ((tokens = vx_tokens_avail(vxi)) > 0)
                        vx_consume_token(vxi);
index 4336cf0..fa3f828 100644 (file)
@@ -77,6 +77,7 @@ static kmem_cache_t *task_struct_cachep;
 static void free_task(struct task_struct *tsk)
 {
        free_thread_info(tsk->thread_info);
+       vxdprintk("freeing up task %p\n", tsk);
        clr_vx_info(&tsk->vx_info);
        clr_nx_info(&tsk->nx_info);
        free_task_struct(tsk);
@@ -869,7 +870,6 @@ struct task_struct *copy_process(unsigned long clone_flags,
        int retval;
        struct task_struct *p = NULL;
        struct vx_info *vxi;
-       struct nx_info *nxi;
 
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);
@@ -899,8 +899,10 @@ struct task_struct *copy_process(unsigned long clone_flags,
        if (!p)
                goto fork_out;
 
-       vxi = get_vx_info(current->vx_info);
-       nxi = get_nx_info(current->nx_info);
+       p->vx_info = NULL;
+       set_vx_info(&p->vx_info, current->vx_info);
+       p->nx_info = NULL;
+       set_nx_info(&p->nx_info, current->nx_info);
 
        /* check vserver memory */
        if (p->mm && !(clone_flags & CLONE_VM)) {
@@ -915,6 +917,7 @@ struct task_struct *copy_process(unsigned long clone_flags,
        }
 
        retval = -EAGAIN;
+       vxi = current->vx_info;
        if (vxi && (atomic_read(&vxi->limit.res[RLIMIT_NPROC])
                >= vxi->limit.rlim[RLIMIT_NPROC]))
                goto bad_fork_free;
index 538834c..8b3cee7 100644 (file)
@@ -12,6 +12,7 @@
  *  V0.05  rlimit basic implementation
  *  V0.06  task_xid and info commands
  *  V0.07  context flags and caps
+ *  V0.08  switch to RCU based hash
  *
  */
 
 #include <linux/vinline.h>
 #include <linux/kernel_stat.h>
 #include <linux/namespace.h>
+#include <linux/rcupdate.h>
 
 #include <asm/errno.h>
 
 
-/*  system functions */
+/*     __alloc_vx_info()
 
+       * allocate an initialized vx_info struct
+       * doesn't make it visible (hash)                        */
 
-LIST_HEAD(vx_infos);
-
-spinlock_t vxlist_lock
-       __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
-
-
-/*
- *     struct vx_info allocation and deallocation
- */
-
-static struct vx_info *alloc_vx_info(int id)
+static struct vx_info *__alloc_vx_info(xid_t xid)
 {
        struct vx_info *new = NULL;
        
-       vxdprintk("alloc_vx_info(%d)\n", id);
+       vxdprintk("alloc_vx_info(%d)\n", xid);
+
        /* would this benefit from a slab cache? */
        new = kmalloc(sizeof(struct vx_info), GFP_KERNEL);
        if (!new)
                return 0;
 
        memset (new, 0, sizeof(struct vx_info));
-       new->vx_id = id;
-       INIT_LIST_HEAD(&new->vx_list);
+       new->vx_id = xid;
+       INIT_RCU_HEAD(&new->vx_rcu);
+       INIT_HLIST_NODE(&new->vx_hlist);
+       atomic_set(&new->vx_refcnt, 0);
+       atomic_set(&new->vx_usecnt, 0);
+
        /* rest of init goes here */
-       
        vx_info_init_limit(&new->limit);
        vx_info_init_sched(&new->sched);
        vx_info_init_cvirt(&new->cvirt);
@@ -63,13 +61,21 @@ static struct vx_info *alloc_vx_info(int id)
        new->vx_bcaps = CAP_INIT_EFF_SET;
        new->vx_ccaps = 0;
 
-       vxdprintk("alloc_vx_info(%d) = %p\n", id, new);
+       vxdprintk("alloc_vx_info(%d) = %p\n", xid, new);
        return new;
 }
 
-void free_vx_info(struct vx_info *vxi)
+/*     __dealloc_vx_info()
+
+       * final disposal of vx_info                             */
+
+static void __dealloc_vx_info(struct vx_info *vxi)
 {
-       vxdprintk("free_vx_info(%p)\n", vxi);
+       vxdprintk("dealloc_vx_info(%p)\n", vxi);
+
+       vxi->vx_hlist.next = LIST_POISON1;
+       vxi->vx_id = -1;
+
        if (vxi->vx_namespace)
                put_namespace(vxi->vx_namespace);
        if (vxi->vx_fs)
@@ -80,69 +86,85 @@ void free_vx_info(struct vx_info *vxi)
        vx_info_exit_cvirt(&vxi->cvirt);
        vx_info_exit_cacct(&vxi->cacct);
        
-       BUG_ON(atomic_read(&vxi->vx_refcount));
-       vxi->vx_id = -1;
+       BUG_ON(atomic_read(&vxi->vx_usecnt));
+       BUG_ON(atomic_read(&vxi->vx_refcnt));
 
        kfree(vxi);
 }
 
 
-/*
- *     struct vx_info search by id
- *     assumes vxlist_lock is held
- */
+/*     hash table for vx_info hash */
 
-static __inline__ struct vx_info *__find_vx_info(int id)
-{
-       struct vx_info *vxi;
+#define        VX_HASH_SIZE    13
 
-       list_for_each_entry(vxi, &vx_infos, vx_list)
-               if (vxi->vx_id == id)
-                       return vxi;
-       return 0;
+struct hlist_head vx_info_hash[VX_HASH_SIZE];
+
+static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED;
+
+
+static inline unsigned int __hashval(xid_t xid)
+{
+       return (xid % VX_HASH_SIZE);
 }
 
 
-/*
- *     struct vx_info ref stuff
- */
 
-struct vx_info *find_vx_info(int id)
+/*     __hash_vx_info()
+
+       * add the vxi to the global hash table
+       * requires the hash_lock to be held                     */
+
+static inline void __hash_vx_info(struct vx_info *vxi)
 {
-       struct vx_info *vxi;
+       struct hlist_head *head;
        
-       if (id < 0) {
-               vxi = current->vx_info;
-               get_vx_info(vxi);
-       } else {
-               spin_lock(&vxlist_lock);
-               if ((vxi = __find_vx_info(id)))
-                       get_vx_info(vxi);
-               spin_unlock(&vxlist_lock);
-       }
-       return vxi;
+       vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
+       get_vx_info(vxi);
+       head = &vx_info_hash[__hashval(vxi->vx_id)];
+       hlist_add_head_rcu(&vxi->vx_hlist, head);
 }
 
-/*
- *     verify that id is a valid xid
- */
+/*     __unhash_vx_info()
+
+       * remove the vxi from the global hash table
+       * requires the hash_lock to be held                     */
 
-int vx_info_id_valid(int id)
+static inline void __unhash_vx_info(struct vx_info *vxi)
 {
-       int valid;
+       vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id);
+       hlist_del_rcu(&vxi->vx_hlist);
+       put_vx_info(vxi);
+}
+
 
-       spin_lock(&vxlist_lock);
-       valid = (__find_vx_info(id) != NULL);
-       spin_unlock(&vxlist_lock);
-       return valid;
+/*     __lookup_vx_info()
+
+       * requires the rcu_read_lock()
+       * doesn't increment the vx_refcnt                       */
+
+static inline struct vx_info *__lookup_vx_info(xid_t xid)
+{
+       struct hlist_head *head = &vx_info_hash[__hashval(xid)];
+       struct hlist_node *pos;
+
+       hlist_for_each(pos, head) {
+               struct vx_info *vxi =
+                       hlist_entry(pos, struct vx_info, vx_hlist);
+
+               if (vxi->vx_id == xid) {
+                       return vxi;
+               }
+       }
+       return NULL;
 }
 
 
-/*
- *     dynamic context id ...
- */
+/*     __vx_dynamic_id()
 
-static __inline__ xid_t __vx_dynamic_id(void)
+       * find unused dynamic xid
+       * requires the hash_lock to be held                     */
+
+static inline xid_t __vx_dynamic_id(void)
 {
        static xid_t seq = MAX_S_CONTEXT;
        xid_t barrier = seq;
@@ -150,29 +172,29 @@ static __inline__ xid_t __vx_dynamic_id(void)
        do {
                if (++seq > MAX_S_CONTEXT)
                        seq = MIN_D_CONTEXT;
-               if (!__find_vx_info(seq))
+               if (!__lookup_vx_info(seq))
                        return seq;
        } while (barrier != seq);
        return 0;
 }
 
-static struct vx_info * __foc_vx_info(int id, int *err)
+/*     __loc_vx_info()
+
+       * locate or create the requested context
+       * get() it and if new hash it                           */
+
+static struct vx_info * __loc_vx_info(int id, int *err)
 {
        struct vx_info *new, *vxi = NULL;
        
-       vxdprintk("foc_vx_info(%d)\n", id);
-       if (!(new = alloc_vx_info(id))) {
-               *err = -ENOMEM;
-               return NULL;
-       }
+       vxdprintk("loc_vx_info(%d)\n", id);
 
-       /* dirty hack until Spectator becomes a cap */
-       if (id == 0 || id == 1) {
-               *err = -EBUSY;
+       if (!(new = __alloc_vx_info(id))) {
+               *err = -ENOMEM;
                return NULL;
        }
 
-       spin_lock(&vxlist_lock);
+       spin_lock(&vx_info_hash_lock);
 
        /* dynamic context requested */
        if (id == VX_DYNAMIC_ID) {
@@ -184,14 +206,14 @@ static struct vx_info * __foc_vx_info(int id, int *err)
                new->vx_id = id;
        }
        /* existing context requested */
-       else if ((vxi = __find_vx_info(id))) {
+       else if ((vxi = __lookup_vx_info(id))) {
                /* context in setup is not available */
                if (vxi->vx_flags & VXF_STATE_SETUP) {
-                       vxdprintk("foc_vx_info(%d) = %p (not available)\n", id, vxi);
+                       vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi);
                        vxi = NULL;
                        *err = -EBUSY;
                } else {
-                       vxdprintk("foc_vx_info(%d) = %p (found)\n", id, vxi);
+                       vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi);
                        get_vx_info(vxi);
                        *err = 0;
                }
@@ -199,27 +221,131 @@ static struct vx_info * __foc_vx_info(int id, int *err)
        }
 
        /* new context requested */
-       vxdprintk("foc_vx_info(%d) = %p (new)\n", id, new);
-       atomic_set(&new->vx_refcount, 1);
-       list_add(&new->vx_list, &vx_infos);
+       vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new);
+       __hash_vx_info(get_vx_info(new));
        vxi = new, new = NULL;
        *err = 1;
 
 out_unlock:
-       spin_unlock(&vxlist_lock);
+       spin_unlock(&vx_info_hash_lock);
        if (new)
-               free_vx_info(new);
+               __dealloc_vx_info(new);
+       return vxi;
+}
+
+
+
+/*     exported stuff                                          */
+
+
+
+void rcu_free_vx_info(void *obj)
+{
+       struct vx_info *vxi = obj;
+       int usecnt, refcnt;
+
+       usecnt = atomic_read(&vxi->vx_usecnt);
+       BUG_ON(usecnt < 0);
+
+       refcnt = atomic_read(&vxi->vx_refcnt);
+       BUG_ON(refcnt < 0);
+
+       if (!usecnt)
+               __dealloc_vx_info(vxi);
+       else
+               printk("!!! rcu didn't free\n");
+}
+
+void unhash_vx_info(struct vx_info *vxi)
+{
+       spin_lock(&vx_info_hash_lock);
+       __unhash_vx_info(vxi);
+       spin_unlock(&vx_info_hash_lock);
+}
+
+/*     locate_vx_info()
+
+       * search for a vx_info and get() it                     
+       * negative id means current                             */
+
+struct vx_info *locate_vx_info(int id)
+{
+       struct vx_info *vxi;
+       
+       if (id < 0) {
+               vxi = get_vx_info(current->vx_info);
+       } else {
+               rcu_read_lock();
+               vxi = get_vx_info(__lookup_vx_info(id));
+               rcu_read_unlock();
+       }
        return vxi;
 }
 
+/*     vx_info_is_hashed()
+
+       * verify that xid is still hashed                       */
+
+int vx_info_is_hashed(xid_t xid)
+{
+       int hashed;
+
+       rcu_read_lock();
+       hashed = (__lookup_vx_info(xid) != NULL);
+       rcu_read_unlock();
+       return hashed;
+}
+
+#ifdef CONFIG_VSERVER_LEGACY
 
-struct vx_info *find_or_create_vx_info(int id)
+#if 0
+struct vx_info *alloc_vx_info(xid_t xid)
+{
+       return __alloc_vx_info(xid);
+}
+#endif
+
+struct vx_info *locate_or_create_vx_info(int id)
 {
        int err;
 
-       return __foc_vx_info(id, &err);
+       return __loc_vx_info(id, &err);
 }
 
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+#define hlist_for_each_rcu(pos, head) \
+        for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
+               pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
+
+int get_xid_list(int index, unsigned int *xids, int size)
+{
+       int hindex, nr_xids = 0;
+
+       rcu_read_lock();
+       for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) {
+               struct hlist_head *head = &vx_info_hash[hindex];
+               struct hlist_node *pos;
+
+               hlist_for_each_rcu(pos, head) {
+                       struct vx_info *vxi;
+
+                       if (--index > 0)
+                               continue;
+
+                       vxi = hlist_entry(pos, struct vx_info, vx_hlist);
+                       xids[nr_xids] = vxi->vx_id;                     
+                       if (++nr_xids >= size)
+                               goto out;
+               }
+       }
+out:
+       rcu_read_unlock();
+       return nr_xids;
+}
+#endif
 
 int vx_migrate_user(struct task_struct *p, struct vx_info *vxi)
 {
@@ -300,17 +426,19 @@ static inline int vx_openfd_task(struct task_struct *tsk)
 
 int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
 {
-       struct vx_info *old_vxi = task_get_vx_info(p);
+       struct vx_info *old_vxi;
        int ret = 0;
        
        if (!p || !vxi)
                BUG();
 
-       vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
-               vxi->vx_id, atomic_read(&vxi->vx_refcount));
+       old_vxi = task_get_vx_info(p);
        if (old_vxi == vxi)
                goto out;
 
+       vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi,
+               vxi->vx_id, atomic_read(&vxi->vx_usecnt));
+
        if (!(ret = vx_migrate_user(p, vxi))) {
                task_lock(p);
                if (old_vxi) {
@@ -321,6 +449,9 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi)
                atomic_inc(&vxi->limit.res[RLIMIT_NPROC]);
                atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]);
                atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]);
+               /* should be handled in set_vx_info !! */
+               if (old_vxi)
+                       clr_vx_info(&p->vx_info);
                set_vx_info(&p->vx_info, vxi);
                p->xid = vxi->vx_id;
                vx_mask_bcaps(p);
@@ -383,7 +514,7 @@ int vc_vx_info(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
                return -EPERM;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -401,7 +532,6 @@ int vc_vx_info(uint32_t id, void __user *data)
 
 int vc_ctx_create(uint32_t xid, void __user *data)
 {
-        // int ret = -ENOMEM;
        struct vx_info *new_vxi;
        int ret;
 
@@ -414,7 +544,7 @@ int vc_ctx_create(uint32_t xid, void __user *data)
        if (xid < 1)
                return -EINVAL;
 
-       new_vxi = __foc_vx_info(xid, &ret);
+       new_vxi = __loc_vx_info(xid, &ret);
        if (!new_vxi)
                return ret;
        if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) {
@@ -424,6 +554,7 @@ int vc_ctx_create(uint32_t xid, void __user *data)
 
        ret = new_vxi->vx_id;
        vx_migrate_task(current, new_vxi);
+       /* if this fails, we might end up with a hashed vx_info */
 out_put:
        put_vx_info(new_vxi);
        return ret;
@@ -443,7 +574,7 @@ int vc_ctx_migrate(uint32_t id, void __user *data)
                return 0;
        }
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
        vx_migrate_task(current, vxi);
@@ -460,13 +591,12 @@ int vc_get_cflags(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
        vc_data.flagword = vxi->vx_flags;
 
-       // vc_data.mask = ~0UL;
        /* special STATE flag handling */
        vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME);
 
@@ -488,7 +618,7 @@ int vc_set_cflags(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -516,7 +646,7 @@ int vc_get_ccaps(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -540,7 +670,7 @@ int vc_set_ccaps(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -553,6 +683,6 @@ int vc_set_ccaps(uint32_t id, void __user *data)
 
 #include <linux/module.h>
 
-EXPORT_SYMBOL_GPL(free_vx_info);
-EXPORT_SYMBOL_GPL(vxlist_lock);
+EXPORT_SYMBOL_GPL(rcu_free_vx_info);
+EXPORT_SYMBOL_GPL(vx_info_hash_lock);
 
index 8afd1fc..4a20f26 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/config.h>
 #include <linux/errno.h>
 #include <linux/vserver.h>
-// #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
 
index a620ae3..8fe6fed 100644 (file)
@@ -74,9 +74,9 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
                return -EINVAL;
                
        if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT))
-               new_vxi = find_or_create_vx_info(ctx);
+               new_vxi = locate_or_create_vx_info(ctx);
        else
-               new_vxi = find_vx_info(ctx);
+               new_vxi = locate_vx_info(ctx);
 
        if (!new_vxi)
                return -EINVAL;
@@ -102,6 +102,7 @@ int vc_new_s_context(uint32_t ctx, void __user *data)
 }
 
 
+extern struct nx_info *create_nx_info(void);
 
 /*  set ipv4 root (syscall) */
 
@@ -152,9 +153,15 @@ int vc_set_ipv4root(uint32_t nbip, void __user *data)
                new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask;
        }
        new_nxi->v4_bcast = vc_data.broadcast;
-       current->nx_info = new_nxi;
-       current->nid = new_nxi->nx_id;
-       put_nx_info(nxi);
+       // current->nx_info = new_nxi;
+       if (nxi) {
+               printk("!!! switching nx_info %p->%p\n", nxi, new_nxi);
+               clr_nx_info(&current->nx_info);
+       }
+       nx_migrate_task(current, new_nxi);
+       // set_nx_info(&current->nx_info, new_nxi);
+       // current->nid = new_nxi->nx_id;
+       put_nx_info(new_nxi);
        return 0;
 }
 
index 5bd2fdc..668918c 100644 (file)
@@ -55,7 +55,7 @@ int vc_get_rlimit(uint32_t id, void __user *data)
        if (!is_valid_rlimit(vc_data.id))
                return -ENOTSUPP;
                
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -81,7 +81,7 @@ int vc_set_rlimit(uint32_t id, void __user *data)
        if (!is_valid_rlimit(vc_data.id))
                return -ENOTSUPP;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
index 2c76c6f..f1c95c4 100644 (file)
@@ -57,7 +57,7 @@ int vc_set_vhi_name(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
        
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
        
@@ -77,7 +77,7 @@ int vc_get_vhi_name(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -126,7 +126,7 @@ int vc_enter_namespace(uint32_t id, void *data)
        if (!vx_check(0, VX_ADMIN))
                return -ENOSYS;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;
 
@@ -158,11 +158,9 @@ out_put:
 int vc_cleanup_namespace(uint32_t id, void *data)
 {
        down_write(&current->namespace->sem);
-       // spin_lock(&dcache_lock);
        spin_lock(&vfsmount_lock);
        umount_unused(current->namespace->root, current->fs);
        spin_unlock(&vfsmount_lock);
-       // spin_unlock(&dcache_lock);
        up_write(&current->namespace->sem);
        return 0;
 }
index 479a19b..b37b0ac 100644 (file)
@@ -8,6 +8,7 @@
  *  V0.01  broken out from vcontext V0.05
  *  V0.02  cleaned up implementation
  *  V0.03  added equiv nx commands
+ *  V0.04  switch to RCU based hash
  *
  */
 
 #include <linux/slab.h>
 #include <linux/vserver/network.h>
 #include <linux/ninline.h>
+#include <linux/rcupdate.h>
 
 #include <asm/errno.h>
 
 
-LIST_HEAD(nx_infos);
+/*     __alloc_nx_info()
 
-spinlock_t nxlist_lock
-       __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+       * allocate an initialized nx_info struct
+       * doesn't make it visible (hash)                        */
 
-
-/*
- *     struct nx_info allocation and deallocation
- */
-
-static struct nx_info *alloc_nx_info(void)
+static struct nx_info *__alloc_nx_info(nid_t nid)
 {
        struct nx_info *new = NULL;
        
        nxdprintk("alloc_nx_info()\n");
+
        /* would this benefit from a slab cache? */
        new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
        if (!new)
                return 0;
        
        memset (new, 0, sizeof(struct nx_info));
+       new->nx_id = nid;
+       INIT_RCU_HEAD(&new->nx_rcu);
+       INIT_HLIST_NODE(&new->nx_hlist);
+       atomic_set(&new->nx_refcnt, 0);
+       atomic_set(&new->nx_usecnt, 0);
+
        /* rest of init goes here */
        
        nxdprintk("alloc_nx_info() = %p\n", new);
        return new;
 }
 
-void free_nx_info(struct nx_info *nxi)
+/*     __dealloc_nx_info()
+
+       * final disposal of nx_info                             */
+
+static void __dealloc_nx_info(struct nx_info *nxi)
 {
-       nxdprintk("free_nx_info(%p)\n", nxi);
+       nxdprintk("dealloc_nx_info(%p)\n", nxi);
+
+       nxi->nx_hlist.next = LIST_POISON1;
+       nxi->nx_id = -1;
+       
+       BUG_ON(atomic_read(&nxi->nx_usecnt));
+       BUG_ON(atomic_read(&nxi->nx_refcnt));
+
        kfree(nxi);
 }
 
-struct nx_info *create_nx_info(void)
-{
-       struct nx_info *new;
-       static int gnid = 1;
-       
-       nxdprintk("create_nx_info()\n");
-       if (!(new = alloc_nx_info()))
-               return 0;
 
-       spin_lock(&nxlist_lock);
+/*     hash table for nx_info hash */
 
-       /* new ip info */
-       atomic_set(&new->nx_refcount, 1);
-       new->nx_id = gnid++;
-       list_add(&new->nx_list, &nx_infos);
+#define        NX_HASH_SIZE    13
 
-       spin_unlock(&nxlist_lock);
-       return new;
-}
+struct hlist_head nx_info_hash[NX_HASH_SIZE];
 
+static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
 
-/*
- *     struct nx_info search by id
- *     assumes nxlist_lock is held
- */
 
-static __inline__ struct nx_info *__find_nx_info(int id)
+static inline unsigned int __hashval(nid_t nid)
 {
-       struct nx_info *nxi;
-
-       list_for_each_entry(nxi, &nx_infos, nx_list)
-               if (nxi->nx_id == id)
-                       return nxi;
-       return 0;
+       return (nid % NX_HASH_SIZE);
 }
 
 
-/*
- *     struct nx_info ref stuff
- */
 
-struct nx_info *find_nx_info(int id)
+/*     __hash_nx_info()
+
+       * add the nxi to the global hash table
+       * requires the hash_lock to be held                     */
+
+static inline void __hash_nx_info(struct nx_info *nxi)
 {
-       struct nx_info *nxi;
+       struct hlist_head *head;
        
-       if (id < 0) {
-               nxi = current->nx_info;
-               get_nx_info(nxi);
-       } else {
-               spin_lock(&nxlist_lock);
-               if ((nxi = __find_nx_info(id)))
-                       get_nx_info(nxi);
-               spin_unlock(&nxlist_lock);
-       }
-       return nxi;
+       nxdprintk("__hash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
+       get_nx_info(nxi);
+       head = &nx_info_hash[__hashval(nxi->nx_id)];
+       hlist_add_head_rcu(&nxi->nx_hlist, head);
 }
 
-/*
- *      verify that id is a valid nid
- */
+/*     __unhash_nx_info()
+
+       * remove the nxi from the global hash table
+       * requires the hash_lock to be held                     */
 
-int nx_info_id_valid(int id)
+static inline void __unhash_nx_info(struct nx_info *nxi)
 {
-       int valid;
-       
-       spin_lock(&nxlist_lock);
-       valid = (__find_nx_info(id) != NULL);
-       spin_unlock(&nxlist_lock);
-       return valid;
+       nxdprintk("__unhash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
+       hlist_del_rcu(&nxi->nx_hlist);
+       put_nx_info(nxi);
 }
 
 
-/*
- *     dynamic context id ...
- */
+/*     __lookup_nx_info()
 
-static __inline__ nid_t __nx_dynamic_id(void)
+       * requires the rcu_read_lock()
+       * doesn't increment the nx_refcnt                       */
+
+static inline struct nx_info *__lookup_nx_info(nid_t nid)
+{
+       struct hlist_head *head = &nx_info_hash[__hashval(nid)];
+       struct hlist_node *pos;
+
+       hlist_for_each(pos, head) {
+               struct nx_info *nxi =
+                       hlist_entry(pos, struct nx_info, nx_hlist);
+
+               if (nxi->nx_id == nid) {
+                       return nxi;
+               }
+       }
+       return NULL;
+}
+
+
+/*     __nx_dynamic_id()
+
+       * find unused dynamic nid
+       * requires the hash_lock to be held                     */
+
+static inline nid_t __nx_dynamic_id(void)
 {
        static nid_t seq = MAX_N_CONTEXT;
        nid_t barrier = seq;
@@ -136,27 +147,32 @@ static __inline__ nid_t __nx_dynamic_id(void)
        do {
                if (++seq > MAX_N_CONTEXT)
                        seq = MIN_D_CONTEXT;
-               if (!__find_nx_info(seq))
+               if (!__lookup_nx_info(seq))
                        return seq;
        } while (barrier != seq);
        return 0;
 }
 
-static struct nx_info * __foc_nx_info(int id, int *err)
+/*     __loc_nx_info()
+
+       * locate or create the requested context
+       * get() it and if new hash it                           */
+
+static struct nx_info * __loc_nx_info(int id, int *err)
 {
        struct nx_info *new, *nxi = NULL;
        
-       nxdprintk("foc_nx_info(%d)\n", id);
-       // if (!(new = alloc_nx_info(id))) {
-       if (!(new = alloc_nx_info())) {
+       nxdprintk("loc_nx_info(%d)\n", id);
+
+       if (!(new = __alloc_nx_info(id))) {
                *err = -ENOMEM;
                return NULL;
        }
 
-       spin_lock(&nxlist_lock);
+       spin_lock(&nx_info_hash_lock);
 
        /* dynamic context requested */
-       if (id == IP_DYNAMIC_ID) {
+       if (id == NX_DYNAMIC_ID) {
                id = __nx_dynamic_id();
                if (!id) {
                        printk(KERN_ERR "no dynamic context available.\n");
@@ -165,14 +181,14 @@ static struct nx_info * __foc_nx_info(int id, int *err)
                new->nx_id = id;
        }
        /* existing context requested */
-       else if ((nxi = __find_nx_info(id))) {
+       else if ((nxi = __lookup_nx_info(id))) {
                /* context in setup is not available */
                if (nxi->nx_flags & VXF_STATE_SETUP) {
-                       nxdprintk("foc_nx_info(%d) = %p (not available)\n", id, nxi);
+                       nxdprintk("loc_nx_info(%d) = %p (not available)\n", id, nxi);
                        nxi = NULL;
                        *err = -EBUSY;
                } else {
-                       nxdprintk("foc_nx_info(%d) = %p (found)\n", id, nxi);
+                       nxdprintk("loc_nx_info(%d) = %p (found)\n", id, nxi);
                        get_nx_info(nxi);
                        *err = 0;
                }
@@ -180,27 +196,139 @@ static struct nx_info * __foc_nx_info(int id, int *err)
        }
 
        /* new context requested */
-       nxdprintk("foc_nx_info(%d) = %p (new)\n", id, new);
-       atomic_set(&new->nx_refcount, 1);
-       list_add(&new->nx_list, &nx_infos);
+       nxdprintk("loc_nx_info(%d) = %p (new)\n", id, new);
+       __hash_nx_info(get_nx_info(new));
        nxi = new, new = NULL;
        *err = 1;
 
 out_unlock:
-       spin_unlock(&nxlist_lock);
+       spin_unlock(&nx_info_hash_lock);
        if (new)
-               free_nx_info(new);
+               __dealloc_nx_info(new);
+       return nxi;
+}
+
+
+
+/*     exported stuff                                          */
+
+
+
+
+void rcu_free_nx_info(void *obj)
+{
+       struct nx_info *nxi = obj;
+       int usecnt, refcnt;
+
+       usecnt = atomic_read(&nxi->nx_usecnt);
+       BUG_ON(usecnt < 0);
+
+       refcnt = atomic_read(&nxi->nx_refcnt);
+       BUG_ON(refcnt < 0);
+
+       if (!usecnt)
+               __dealloc_nx_info(nxi);
+       else
+               printk("!!! rcu didn't free\n");
+}
+
+void unhash_nx_info(struct nx_info *nxi)
+{
+       spin_lock(&nx_info_hash_lock);
+       __unhash_nx_info(nxi);
+       spin_unlock(&nx_info_hash_lock);
+}
+
+/*     locate_nx_info()
+
+       * search for a nx_info and get() it                     
+       * negative id means current                             */
+
+struct nx_info *locate_nx_info(int id)
+{
+       struct nx_info *nxi;
+       
+       if (id < 0) {
+               nxi = get_nx_info(current->nx_info);
+       } else {
+               rcu_read_lock();
+               nxi = get_nx_info(__lookup_nx_info(id));
+               rcu_read_unlock();
+       }
        return nxi;
 }
 
+/*     nx_info_is_hashed()
+
+       * verify that nid is still hashed                       */
+
+int nx_info_is_hashed(nid_t nid)
+{
+       int hashed;
+
+       rcu_read_lock();
+       hashed = (__lookup_nx_info(nid) != NULL);
+       rcu_read_unlock();
+       return hashed;
+}
+
+#ifdef CONFIG_VSERVER_LEGACY
 
-struct nx_info *find_or_create_nx_info(int id)
+struct nx_info *locate_or_create_nx_info(int id)
 {
        int err;
 
-       return __foc_nx_info(id, &err);
+       return __loc_nx_info(id, &err);
 }
 
+struct nx_info *create_nx_info(void)
+{
+       struct nx_info *new;
+       int err;
+       
+       nxdprintk("create_nx_info()\n");
+       if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
+               return NULL;
+       return new;
+}
+
+
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+#define hlist_for_each_rcu(pos, head) \
+        for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
+               pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
+
+int get_nid_list(int index, unsigned int *nids, int size)
+{
+       int hindex, nr_nids = 0;
+
+       rcu_read_lock();
+       for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
+               struct hlist_head *head = &nx_info_hash[hindex];
+               struct hlist_node *pos;
+
+               hlist_for_each_rcu(pos, head) {
+                       struct nx_info *nxi;
+
+                       if (--index > 0)
+                               continue;
+
+                       nxi = hlist_entry(pos, struct nx_info, nx_hlist);
+                       nids[nr_nids] = nxi->nx_id;                     
+                       if (++nr_nids >= size)
+                               goto out;
+               }
+       }
+out:
+       rcu_read_unlock();
+       return nr_nids;
+}
+#endif
+
+
 /*
  *     migrate task to new network
  */
@@ -213,17 +341,22 @@ int nx_migrate_task(struct task_struct *p, struct nx_info *nxi)
        if (!p || !nxi)
                BUG();
 
-       nxdprintk("nx_migrate_task(%p,%p[#%d.%d)\n", p, nxi,
-               nxi->nx_id, atomic_read(&nxi->nx_refcount));
+       nxdprintk("nx_migrate_task(%p,%p[#%d.%d.%d])\n",
+               p, nxi, nxi->nx_id,
+               atomic_read(&nxi->nx_usecnt),
+               atomic_read(&nxi->nx_refcnt));
        if (old_nxi == nxi)
                goto out;
 
        task_lock(p);
+       /* should be handled in set_nx_info !! */
+       if (old_nxi)
+               clr_nx_info(&p->nx_info);
        set_nx_info(&p->nx_info, nxi);
        p->nid = nxi->nx_id;
        task_unlock(p);
 
-       put_nx_info(old_nxi);
+       // put_nx_info(old_nxi);
 out:
        put_nx_info(old_nxi);
        return ret;
@@ -246,10 +379,9 @@ static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi)
 
 int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
 {
-       if (!nxi)
-               return 1;
-       
-       return __addr_in_nx_info(ifa->ifa_address, nxi);
+       if (nxi && ifa)
+               return __addr_in_nx_info(ifa->ifa_address, nxi);
+       return 1;
 }
 
 int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
@@ -312,7 +444,7 @@ int vc_nx_info(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
                return -EPERM;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
 
@@ -342,7 +474,7 @@ int vc_net_create(uint32_t nid, void __user *data)
        if (nid < 1)
                return -EINVAL;
 
-       new_nxi = __foc_nx_info(nid, &ret);
+       new_nxi = __loc_nx_info(nid, &ret);
        if (!new_nxi)
                return ret;
        if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
@@ -365,7 +497,7 @@ int vc_net_migrate(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
        nx_migrate_task(current, nxi);
@@ -383,7 +515,7 @@ int vc_net_add(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
 
@@ -402,7 +534,7 @@ int vc_net_remove(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
 
@@ -421,13 +553,12 @@ int vc_get_nflags(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
 
        vc_data.flagword = nxi->nx_flags;
 
-       // vc_data.mask = ~0UL;
        /* special STATE flag handling */
        vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
 
@@ -449,7 +580,7 @@ int vc_set_nflags(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
 
@@ -472,7 +603,7 @@ int vc_get_ncaps(uint32_t id, void __user *data)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
 
@@ -495,7 +626,7 @@ int vc_set_ncaps(uint32_t id, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
 
-       nxi = find_nx_info(id);
+       nxi = locate_nx_info(id);
        if (!nxi)
                return -ESRCH;
 
@@ -508,6 +639,6 @@ int vc_set_ncaps(uint32_t id, void __user *data)
 
 #include <linux/module.h>
 
-EXPORT_SYMBOL_GPL(free_nx_info);
-EXPORT_SYMBOL_GPL(nxlist_lock);
+EXPORT_SYMBOL_GPL(rcu_free_nx_info);
+EXPORT_SYMBOL_GPL(nx_info_hash_lock);
 
index 42bc182..3c98a53 100644 (file)
@@ -66,7 +66,7 @@ int proc_xid_info (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = sprintf(buffer,
@@ -86,19 +86,21 @@ int proc_xid_status (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = sprintf(buffer,
-               "RefC:\t%d\n"           
+               "UseCnt:\t%d\n"         
+               "RefCnt:\t%d\n"         
                "Flags:\t%016llx\n"
                "BCaps:\t%016llx\n"
                "CCaps:\t%016llx\n"
                "Ticks:\t%d\n"          
-               ,atomic_read(&vxi->vx_refcount)
-               ,vxi->vx_flags
-               ,vxi->vx_bcaps
-               ,vxi->vx_ccaps
+               ,atomic_read(&vxi->vx_usecnt)
+               ,atomic_read(&vxi->vx_refcnt)
+               ,(unsigned long long)vxi->vx_flags
+               ,(unsigned long long)vxi->vx_bcaps
+               ,(unsigned long long)vxi->vx_ccaps
                ,atomic_read(&vxi->limit.ticks)
                );
        put_vx_info(vxi);
@@ -110,7 +112,7 @@ int proc_xid_limit (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = vx_info_proc_limit(&vxi->limit, buffer);
@@ -123,7 +125,7 @@ int proc_xid_sched (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = vx_info_proc_sched(&vxi->sched, buffer);
@@ -136,7 +138,7 @@ int proc_xid_cvirt (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = vx_info_proc_cvirt(&vxi->cvirt, buffer);
@@ -149,7 +151,7 @@ int proc_xid_cacct (int vid, char *buffer)
        struct vx_info *vxi;
        int length;
 
-       vxi = find_vx_info(vid);
+       vxi = locate_vx_info(vid);
        if (!vxi)
                return 0;
        length = vx_info_proc_cacct(&vxi->cacct, buffer);
@@ -178,7 +180,7 @@ int proc_nid_info (int vid, char *buffer)
        struct nx_info *nxi;
        int length, i;
 
-       nxi = find_nx_info(vid);
+       nxi = locate_nx_info(vid);
        if (!nxi)
                return 0;
        length = sprintf(buffer,
@@ -202,12 +204,14 @@ int proc_nid_status (int vid, char *buffer)
        struct nx_info *nxi;
        int length;
 
-       nxi = find_nx_info(vid);
+       nxi = locate_nx_info(vid);
        if (!nxi)
                return 0;
        length = sprintf(buffer,
-               "RefC:\t%d\n"           
-               ,atomic_read(&nxi->nx_refcount)
+               "UseCnt:\t%d\n"         
+               "RefCnt:\t%d\n"         
+               ,atomic_read(&nxi->nx_usecnt)
+               ,atomic_read(&nxi->nx_refcnt)
                );
        put_nx_info(nxi);
        return length;
@@ -247,18 +251,18 @@ out:
 static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd)
 {
        struct inode * inode = dentry->d_inode;
-       int vid, valid=0;
+       int vid, hashed=0;
 
        vid = inode_vid(inode);
        switch (inode_type(inode) & PROC_VID_MASK) {
                case PROC_XID_INO:
-                       valid = vx_info_id_valid(vid);
+                       hashed = vx_info_is_hashed(vid);
                        break;
                case PROC_NID_INO:
-                       valid = nx_info_id_valid(vid);
+                       hashed = nx_info_is_hashed(vid);
                        break;
        }       
-       if (valid)
+       if (hashed)
                return 1;
        d_drop(dentry);
        return 0;
@@ -564,7 +568,7 @@ struct dentry *proc_virtual_lookup(struct inode *dir,
        xid = atovid(name, len);
        if (xid < 0)
                goto out;
-       vxi = find_vx_info(xid);
+       vxi = locate_vx_info(xid);
        if (!vxi)
                goto out;
 
@@ -634,7 +638,7 @@ struct dentry *proc_vnet_lookup(struct inode *dir,
        nid = atovid(name, len);
        if (nid < 0)
                goto out;
-       nxi = find_nx_info(nid);
+       nxi = locate_nx_info(nid);
        if (!nxi)
                goto out;
 
@@ -667,27 +671,6 @@ out:
 #define PROC_NUMBUF 10
 #define PROC_MAXVIDS 32
 
-
-static int get_xid_list(int index, unsigned int *xids)
-{
-       struct vx_info *p;
-       int nr_xids = 0;
-
-       index--;
-       spin_lock(&vxlist_lock);
-       list_for_each_entry(p, &vx_infos, vx_list) {
-               int xid = p->vx_id;
-
-               if (--index >= 0)
-                       continue;
-               xids[nr_xids] = xid;
-               if (++nr_xids >= PROC_MAXVIDS)
-                       break;
-       }
-       spin_unlock(&vxlist_lock);
-       return nr_xids;
-}
-
 int proc_virtual_readdir(struct file * filp,
        void * dirent, filldir_t filldir)
 {
@@ -729,12 +712,11 @@ int proc_virtual_readdir(struct file * filp,
                        filp->f_pos++;
        }
 
-       nr_xids = get_xid_list(nr, xid_array);
-
+       nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS);
        for (i = 0; i < nr_xids; i++) {
                int xid = xid_array[i];
                ino_t ino = fake_ino(xid, PROC_XID_INO);
-               unsigned long j = PROC_NUMBUF;
+               unsigned int j = PROC_NUMBUF;
 
                do buf[--j] = '0' + (xid % 10); while (xid/=10);
 
@@ -757,27 +739,6 @@ static struct inode_operations proc_virtual_dir_inode_operations = {
 };
 
 
-
-static int get_nid_list(int index, unsigned int *nids)
-{
-       struct nx_info *p;
-       int nr_nids = 0;
-
-       index--;
-       spin_lock(&nxlist_lock);
-       list_for_each_entry(p, &nx_infos, nx_list) {
-               int nid = p->nx_id;
-
-               if (--index >= 0)
-                       continue;
-               nids[nr_nids] = nid;
-               if (++nr_nids >= PROC_MAXVIDS)
-                       break;
-       }
-       spin_unlock(&nxlist_lock);
-       return nr_nids;
-}
-
 int proc_vnet_readdir(struct file * filp,
        void * dirent, filldir_t filldir)
 {
@@ -819,8 +780,7 @@ int proc_vnet_readdir(struct file * filp,
                        filp->f_pos++;
        }
 
-       nr_nids = get_nid_list(nr, nid_array);
-
+       nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS);
        for (i = 0; i < nr_nids; i++) {
                int nid = nid_array[i];
                ino_t ino = fake_ino(nid, PROC_NID_INO);
index a75195a..9284bf4 100644 (file)
@@ -126,7 +126,7 @@ int vc_set_sched(uint32_t xid, void __user *data)
        if (copy_from_user (&vc_data, data, sizeof(vc_data)))
                return -EFAULT;
        
-       vxi = find_vx_info(xid);
+       vxi = locate_vx_info(xid);
        if (!vxi)
                return -EINVAL;
 
index 464ea1b..200eba8 100644 (file)
@@ -38,7 +38,7 @@ int vc_ctx_kill(uint32_t id, void __user *data)
        info.si_pid = current->pid;
        info.si_uid = current->uid;
 
-       vxi = find_vx_info(id);
+       vxi = locate_vx_info(id);
        if (!vxi)
                return -ESRCH;