From: Mark Huang Date: Wed, 2 Jun 2004 21:13:46 +0000 (+0000) Subject: patch-2.6.6-vs1.9.1 X-Git-Tag: vserver-2_6_6-vs1_9_1~1 X-Git-Url: http://git.onelab.eu/?p=linux-2.6.git;a=commitdiff_plain;h=d46bc780027c5439db9f72d42c0732775b53925a patch-2.6.6-vs1.9.1 --- diff --git a/Makefile b/Makefile index 1511e96ae..78f0c1b54 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 6 -EXTRAVERSION = -vs1.9.0 +EXTRAVERSION = -vs1.9.1 NAME=Zonked Quokka # *DOCUMENTATION* diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 101055bbf..11ee1a253 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -49,7 +49,9 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { + if ((oldflags & EXT2_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 82c325f55..21080fbbb 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -58,7 +58,9 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, * * This test looks nicer. Thanks to Pauline Middelink */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { + if ((oldflags & EXT3_IMMUTABLE_FL) || + ((flags ^ oldflags) & + (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL))) { if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; } diff --git a/fs/namespace.c b/fs/namespace.c index dfeac2193..fa8b30e28 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -691,7 +691,7 @@ static int do_add_mount(struct nameidata *nd, char *type, int flags, return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN) && !vx_ccaps(VXC_SECURE_MOUNT)) return -EPERM; mnt = do_kern_mount(type, flags, name, data); diff --git a/fs/proc/array.c b/fs/proc/array.c index 4a2cce73e..c210ba307 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -301,7 +301,7 @@ int proc_pid_status(struct task_struct *task, char * buffer) vxi = task_get_vx_info(task); if (vxi) { buffer += sprintf (buffer,"ctxflags: %08llx\n" - ,vxi->vx_flags); + ,(unsigned long long)vxi->vx_flags); buffer += sprintf (buffer,"initpid: %d\n" ,vxi->vx_initpid); } else { @@ -322,8 +322,6 @@ int proc_pid_status(struct task_struct *task, char * buffer) *buffer++ = '\n'; buffer += sprintf (buffer,"ipv4root_bcast: %08x\n" ,nxi->v4_bcast); - buffer += sprintf (buffer,"ipv4root_refcnt: %d\n" - ,atomic_read(&nxi->nx_refcount)); } else { buffer += sprintf (buffer,"ipv4root: 0\n"); buffer += sprintf (buffer,"ipv4root_bcast: 0\n"); diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index eaddaf9ce..0e225be22 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -49,9 +49,9 @@ int reiserfs_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, return -EFAULT; oldflags = REISERFS_I(inode) -> i_attrs; - if ( ( ( flags ^ oldflags) & - ( REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) && - !capable( CAP_LINUX_IMMUTABLE ) ) + if ( (oldflags & REISERFS_IMMUTABLE_FL) || ( ( (flags ^ oldflags) & + (REISERFS_IMMUTABLE_FL | REISERFS_IUNLINK_FL | REISERFS_APPEND_FL)) && + !capable( CAP_LINUX_IMMUTABLE ) ) ) return -EPERM; if( ( flags & REISERFS_NOTAIL_FL ) && diff --git a/include/linux/ninline.h b/include/linux/ninline.h index d3f752516..d07fc8438 100644 --- a/include/linux/ninline.h +++ b/include/linux/ninline.h @@ -5,6 +5,7 @@ // #define NX_DEBUG #include +#include #include #include "vserver/network.h" @@ -16,38 +17,38 @@ #endif -void free_nx_info(struct nx_info *); - extern int proc_pid_nx_info(struct task_struct *, char *); #define get_nx_info(i) __get_nx_info(i,__FILE__,__LINE__) -static __inline__ struct nx_info *__get_nx_info(struct nx_info *nxi, const char *_file, int _line) +static inline struct nx_info *__get_nx_info(struct nx_info *nxi, + const char *_file, int _line) { if (!nxi) return NULL; - nxdprintk("get_nx_info(%p[%d.%d])\t%s:%d\n", - nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0, + nxdprintk("get_nx_info(%p[#%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, _file, _line); - atomic_inc(&nxi->nx_refcount); + atomic_inc(&nxi->nx_usecnt); return nxi; } + +#define free_nx_info(nxi) \ + call_rcu(&nxi->nx_rcu, rcu_free_nx_info, nxi); + #define put_nx_info(i) __put_nx_info(i,__FILE__,__LINE__) -static __inline__ void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) +static inline void __put_nx_info(struct nx_info *nxi, const char *_file, int _line) { if (!nxi) return; - nxdprintk("put_nx_info(%p[%d.%d])\t%s:%d\n", - nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0, + nxdprintk("put_nx_info(%p[#%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_usecnt):0, _file, _line); - if (atomic_dec_and_lock(&nxi->nx_refcount, &nxlist_lock)) { - list_del(&nxi->nx_list); - spin_unlock(&nxlist_lock); + if (atomic_dec_and_test(&nxi->nx_usecnt)) free_nx_info(nxi); - } } @@ -59,9 +60,12 @@ static inline void __set_nx_info(struct nx_info **nxp, struct nx_info *nxi, BUG_ON(*nxp); if (!nxi) return; - nxdprintk("set_nx_info(%p[#%d.%d])\t%s:%d\n", - nxi, nxi?nxi->nx_id:0, nxi?atomic_read(&nxi->nx_refcount):0, + nxdprintk("set_nx_info(%p[#%d.%d.%d])\t%s:%d\n", + nxi, nxi?nxi->nx_id:0, + nxi?atomic_read(&nxi->nx_usecnt):0, + nxi?atomic_read(&nxi->nx_refcnt):0, _file, _line); + atomic_inc(&nxi->nx_refcnt); *nxp = __get_nx_info(nxi, _file, _line); } @@ -74,11 +78,15 @@ static inline void __clr_nx_info(struct nx_info **nxp, if (!nxo) return; - nxdprintk("clr_nx_info(%p[#%d.%d])\t%s:%d\n", - nxo, nxo?nxo->nx_id:0, nxo?atomic_read(&nxo->nx_refcount):0, + nxdprintk("clr_nx_info(%p[#%d.%d.%d])\t%s:%d\n", + nxo, nxo?nxo->nx_id:0, + nxo?atomic_read(&nxo->nx_usecnt):0, + nxo?atomic_read(&nxo->nx_refcnt):0, _file, _line); *nxp = NULL; wmb(); + if (nxo && atomic_dec_and_test(&nxo->nx_refcnt)) + unhash_nx_info(nxo); __put_nx_info(nxo, _file, _line); } diff --git a/include/linux/vinline.h b/include/linux/vinline.h index 07bb3698a..ce2bf36c1 100644 --- a/include/linux/vinline.h +++ b/include/linux/vinline.h @@ -5,6 +5,7 @@ // #define VX_DEBUG #include +#include #include #include "vserver/context.h" @@ -19,39 +20,38 @@ -void free_vx_info(struct vx_info *); - extern int proc_pid_vx_info(struct task_struct *, char *); #define get_vx_info(i) __get_vx_info(i,__FILE__,__LINE__) -static __inline__ struct vx_info *__get_vx_info(struct vx_info *vxi, +static inline struct vx_info *__get_vx_info(struct vx_info *vxi, const char *_file, int _line) { if (!vxi) return NULL; vxdprintk("get_vx_info(%p[#%d.%d])\t%s:%d\n", - vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0, + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, _file, _line); - atomic_inc(&vxi->vx_refcount); + atomic_inc(&vxi->vx_usecnt); return vxi; } + +#define free_vx_info(vxi) \ + call_rcu(&vxi->vx_rcu, rcu_free_vx_info, vxi); + #define put_vx_info(i) __put_vx_info(i,__FILE__,__LINE__) -static __inline__ void __put_vx_info(struct vx_info *vxi, const char *_file, int _line) +static inline void __put_vx_info(struct vx_info *vxi, const char *_file, int _line) { if (!vxi) return; vxdprintk("put_vx_info(%p[#%d.%d])\t%s:%d\n", - vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0, + vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_usecnt):0, _file, _line); - if (atomic_dec_and_lock(&vxi->vx_refcount, &vxlist_lock)) { - list_del(&vxi->vx_list); - spin_unlock(&vxlist_lock); + if (atomic_dec_and_test(&vxi->vx_usecnt)) free_vx_info(vxi); - } } #define set_vx_info(p,i) __set_vx_info(p,i,__FILE__,__LINE__) @@ -62,9 +62,12 @@ static inline void __set_vx_info(struct vx_info **vxp, struct vx_info *vxi, BUG_ON(*vxp); if (!vxi) return; - vxdprintk("set_vx_info(%p[#%d.%d])\t%s:%d\n", - vxi, vxi?vxi->vx_id:0, vxi?atomic_read(&vxi->vx_refcount):0, + vxdprintk("set_vx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxi, vxi?vxi->vx_id:0, + vxi?atomic_read(&vxi->vx_usecnt):0, + vxi?atomic_read(&vxi->vx_refcnt):0, _file, _line); + atomic_inc(&vxi->vx_refcnt); *vxp = __get_vx_info(vxi, _file, _line); } @@ -75,11 +78,17 @@ static inline void __clr_vx_info(struct vx_info **vxp, { struct vx_info *vxo = *vxp; - vxdprintk("clr_vx_info(%p[#%d.%d])\t%s:%d\n", - vxo, vxo?vxo->vx_id:0, vxo?atomic_read(&vxo->vx_refcount):0, + if (!vxo) + return; + vxdprintk("clr_vx_info(%p[#%d.%d.%d])\t%s:%d\n", + vxo, vxo?vxo->vx_id:0, + vxo?atomic_read(&vxo->vx_usecnt):0, + vxo?atomic_read(&vxo->vx_refcnt):0, _file, _line); *vxp = NULL; wmb(); + if (vxo && atomic_dec_and_test(&vxo->vx_refcnt)) + unhash_vx_info(vxo); __put_vx_info(vxo, _file, _line); } diff --git a/include/linux/vserver/context.h b/include/linux/vserver/context.h index 76926038e..fffb3b90c 100644 --- a/include/linux/vserver/context.h +++ b/include/linux/vserver/context.h @@ -12,6 +12,7 @@ #include #include +#include #define _VX_INFO_DEF_ #include "cvirt.h" @@ -20,9 +21,11 @@ #undef _VX_INFO_DEF_ struct vx_info { - struct list_head vx_list; /* linked list of contexts */ + struct hlist_node vx_hlist; /* linked list of contexts */ + struct rcu_head vx_rcu; /* the rcu head */ xid_t vx_id; /* context id */ - atomic_t vx_refcount; /* refcount */ + atomic_t vx_usecnt; /* usage count */ + atomic_t vx_refcnt; /* reference count */ struct vx_info *vx_parent; /* parent context */ struct namespace *vx_namespace; /* private namespace */ @@ -42,10 +45,6 @@ struct vx_info { }; -extern spinlock_t vxlist_lock; -extern struct list_head vx_infos; - - #define VX_ADMIN 0x0001 #define VX_WATCH 0x0002 #define VX_DUMMY 0x0008 @@ -63,11 +62,14 @@ extern struct list_head vx_infos; #define VX_ATR_MASK 0x0F00 -void free_vx_info(struct vx_info *); +extern void rcu_free_vx_info(void *); +extern void unhash_vx_info(struct vx_info *); + +extern struct vx_info *locate_vx_info(int); +extern struct vx_info *locate_or_create_vx_info(int); -extern struct vx_info *find_vx_info(int); -extern struct vx_info *find_or_create_vx_info(int); -extern int vx_info_id_valid(int); +extern int get_xid_list(int, unsigned int *, int); +extern int vx_info_is_hashed(xid_t); extern int vx_migrate_task(struct task_struct *, struct vx_info *); diff --git a/include/linux/vserver/network.h b/include/linux/vserver/network.h index b3c39b062..086f566c4 100644 --- a/include/linux/vserver/network.h +++ b/include/linux/vserver/network.h @@ -3,7 +3,7 @@ #define MAX_N_CONTEXT 65535 /* Arbitrary limit */ -#define IP_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ +#define NX_DYNAMIC_ID ((uint32_t)-1) /* id for dynamic context */ #define NB_IPV4ROOT 16 @@ -12,14 +12,17 @@ #include #include #include +#include #include #include struct nx_info { - struct list_head nx_list; /* linked list of nxinfos */ + struct hlist_node nx_hlist; /* linked list of nxinfos */ + struct rcu_head nx_rcu; /* the rcu head */ nid_t nx_id; /* vnet id */ - atomic_t nx_refcount; + atomic_t nx_usecnt; /* usage count */ + atomic_t nx_refcnt; /* reference count */ uint64_t nx_flags; /* network flag word */ uint64_t nx_ncaps; /* network capabilities */ @@ -38,15 +41,16 @@ struct nx_info { }; -extern spinlock_t nxlist_lock; -extern struct list_head nx_infos; +extern void rcu_free_nx_info(void *); +extern void unhash_nx_info(struct nx_info *); +extern struct nx_info *locate_nx_info(int); +extern struct nx_info *locate_or_create_nx_info(int); -void free_nx_info(struct nx_info *); -struct nx_info *create_nx_info(void); +extern int get_nid_list(int, unsigned int *, int); +extern int nx_info_is_hashed(nid_t); -extern struct nx_info *find_nx_info(int); -extern int nx_info_id_valid(int); +extern int nx_migrate_task(struct task_struct *, struct nx_info *); struct in_ifaddr; struct net_device; diff --git a/include/linux/vserver/sched.h b/include/linux/vserver/sched.h index d1a206800..0f4469383 100644 --- a/include/linux/vserver/sched.h +++ b/include/linux/vserver/sched.h @@ -24,15 +24,15 @@ struct _vx_sched { static inline void vx_info_init_sched(struct _vx_sched *sched) { - /* scheduling; hard code starting values as constants */ - sched->fill_rate = 1; - sched->interval = 4; - sched->tokens_min = HZ >> 4; - sched->tokens_max = HZ >> 1; - sched->jiffies = jiffies; - sched->tokens_lock = SPIN_LOCK_UNLOCKED; - - atomic_set(&sched->tokens, HZ >> 2); + /* scheduling; hard code starting values as constants */ + sched->fill_rate = 1; + sched->interval = 4; + sched->tokens_min = HZ >> 4; + sched->tokens_max = HZ >> 1; + sched->jiffies = jiffies; + sched->tokens_lock = SPIN_LOCK_UNLOCKED; + + atomic_set(&sched->tokens, HZ >> 2); sched->cpus_allowed = CPU_MASK_ALL; } @@ -50,7 +50,7 @@ static inline int vx_info_proc_sched(struct _vx_sched *sched, char *buffer) "Interval:\t%8d\n" "TokensMin:\t%8d\n" "TokensMax:\t%8d\n" - ,sched->ticks + ,(unsigned long long)sched->ticks ,atomic_read(&sched->tokens) ,sched->fill_rate ,sched->interval @@ -119,9 +119,9 @@ static inline int vx_need_resched(struct task_struct *p) int tokens; p->time_slice--; - if (atomic_read(&vxi->vx_refcount) < 1) + if (atomic_read(&vxi->vx_usecnt) < 1) printk("need_resched: p=%p, s=%ld, ref=%d, id=%d/%d\n", - p, p->state, atomic_read(&vxi->vx_refcount), + p, p->state, atomic_read(&vxi->vx_usecnt), vxi->vx_id, p->xid); if ((tokens = vx_tokens_avail(vxi)) > 0) vx_consume_token(vxi); diff --git a/kernel/fork.c b/kernel/fork.c index 4336cf0ba..fa3f82802 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -77,6 +77,7 @@ static kmem_cache_t *task_struct_cachep; static void free_task(struct task_struct *tsk) { free_thread_info(tsk->thread_info); + vxdprintk("freeing up task %p\n", tsk); clr_vx_info(&tsk->vx_info); clr_nx_info(&tsk->nx_info); free_task_struct(tsk); @@ -869,7 +870,6 @@ struct task_struct *copy_process(unsigned long clone_flags, int retval; struct task_struct *p = NULL; struct vx_info *vxi; - struct nx_info *nxi; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -899,8 +899,10 @@ struct task_struct *copy_process(unsigned long clone_flags, if (!p) goto fork_out; - vxi = get_vx_info(current->vx_info); - nxi = get_nx_info(current->nx_info); + p->vx_info = NULL; + set_vx_info(&p->vx_info, current->vx_info); + p->nx_info = NULL; + set_nx_info(&p->nx_info, current->nx_info); /* check vserver memory */ if (p->mm && !(clone_flags & CLONE_VM)) { @@ -915,6 +917,7 @@ struct task_struct *copy_process(unsigned long clone_flags, } retval = -EAGAIN; + vxi = current->vx_info; if (vxi && (atomic_read(&vxi->limit.res[RLIMIT_NPROC]) >= vxi->limit.rlim[RLIMIT_NPROC])) goto bad_fork_free; diff --git a/kernel/vserver/context.c b/kernel/vserver/context.c index 538834c57..8b3cee7cc 100644 --- a/kernel/vserver/context.c +++ b/kernel/vserver/context.c @@ -12,6 +12,7 @@ * V0.05 rlimit basic implementation * V0.06 task_xid and info commands * V0.07 context flags and caps + * V0.08 switch to RCU based hash * */ @@ -22,38 +23,35 @@ #include #include #include +#include #include -/* system functions */ +/* __alloc_vx_info() + * allocate an initialized vx_info struct + * doesn't make it visible (hash) */ -LIST_HEAD(vx_infos); - -spinlock_t vxlist_lock - __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; - - -/* - * struct vx_info allocation and deallocation - */ - -static struct vx_info *alloc_vx_info(int id) +static struct vx_info *__alloc_vx_info(xid_t xid) { struct vx_info *new = NULL; - vxdprintk("alloc_vx_info(%d)\n", id); + vxdprintk("alloc_vx_info(%d)\n", xid); + /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct vx_info), GFP_KERNEL); if (!new) return 0; memset (new, 0, sizeof(struct vx_info)); - new->vx_id = id; - INIT_LIST_HEAD(&new->vx_list); + new->vx_id = xid; + INIT_RCU_HEAD(&new->vx_rcu); + INIT_HLIST_NODE(&new->vx_hlist); + atomic_set(&new->vx_refcnt, 0); + atomic_set(&new->vx_usecnt, 0); + /* rest of init goes here */ - vx_info_init_limit(&new->limit); vx_info_init_sched(&new->sched); vx_info_init_cvirt(&new->cvirt); @@ -63,13 +61,21 @@ static struct vx_info *alloc_vx_info(int id) new->vx_bcaps = CAP_INIT_EFF_SET; new->vx_ccaps = 0; - vxdprintk("alloc_vx_info(%d) = %p\n", id, new); + vxdprintk("alloc_vx_info(%d) = %p\n", xid, new); return new; } -void free_vx_info(struct vx_info *vxi) +/* __dealloc_vx_info() + + * final disposal of vx_info */ + +static void __dealloc_vx_info(struct vx_info *vxi) { - vxdprintk("free_vx_info(%p)\n", vxi); + vxdprintk("dealloc_vx_info(%p)\n", vxi); + + vxi->vx_hlist.next = LIST_POISON1; + vxi->vx_id = -1; + if (vxi->vx_namespace) put_namespace(vxi->vx_namespace); if (vxi->vx_fs) @@ -80,69 +86,85 @@ void free_vx_info(struct vx_info *vxi) vx_info_exit_cvirt(&vxi->cvirt); vx_info_exit_cacct(&vxi->cacct); - BUG_ON(atomic_read(&vxi->vx_refcount)); - vxi->vx_id = -1; + BUG_ON(atomic_read(&vxi->vx_usecnt)); + BUG_ON(atomic_read(&vxi->vx_refcnt)); kfree(vxi); } -/* - * struct vx_info search by id - * assumes vxlist_lock is held - */ +/* hash table for vx_info hash */ -static __inline__ struct vx_info *__find_vx_info(int id) -{ - struct vx_info *vxi; +#define VX_HASH_SIZE 13 - list_for_each_entry(vxi, &vx_infos, vx_list) - if (vxi->vx_id == id) - return vxi; - return 0; +struct hlist_head vx_info_hash[VX_HASH_SIZE]; + +static spinlock_t vx_info_hash_lock = SPIN_LOCK_UNLOCKED; + + +static inline unsigned int __hashval(xid_t xid) +{ + return (xid % VX_HASH_SIZE); } -/* - * struct vx_info ref stuff - */ -struct vx_info *find_vx_info(int id) +/* __hash_vx_info() + + * add the vxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_vx_info(struct vx_info *vxi) { - struct vx_info *vxi; + struct hlist_head *head; - if (id < 0) { - vxi = current->vx_info; - get_vx_info(vxi); - } else { - spin_lock(&vxlist_lock); - if ((vxi = __find_vx_info(id))) - get_vx_info(vxi); - spin_unlock(&vxlist_lock); - } - return vxi; + vxdprintk("__hash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); + get_vx_info(vxi); + head = &vx_info_hash[__hashval(vxi->vx_id)]; + hlist_add_head_rcu(&vxi->vx_hlist, head); } -/* - * verify that id is a valid xid - */ +/* __unhash_vx_info() + + * remove the vxi from the global hash table + * requires the hash_lock to be held */ -int vx_info_id_valid(int id) +static inline void __unhash_vx_info(struct vx_info *vxi) { - int valid; + vxdprintk("__unhash_vx_info: %p[#%d]\n", vxi, vxi->vx_id); + hlist_del_rcu(&vxi->vx_hlist); + put_vx_info(vxi); +} + - spin_lock(&vxlist_lock); - valid = (__find_vx_info(id) != NULL); - spin_unlock(&vxlist_lock); - return valid; +/* __lookup_vx_info() + + * requires the rcu_read_lock() + * doesn't increment the vx_refcnt */ + +static inline struct vx_info *__lookup_vx_info(xid_t xid) +{ + struct hlist_head *head = &vx_info_hash[__hashval(xid)]; + struct hlist_node *pos; + + hlist_for_each(pos, head) { + struct vx_info *vxi = + hlist_entry(pos, struct vx_info, vx_hlist); + + if (vxi->vx_id == xid) { + return vxi; + } + } + return NULL; } -/* - * dynamic context id ... - */ +/* __vx_dynamic_id() -static __inline__ xid_t __vx_dynamic_id(void) + * find unused dynamic xid + * requires the hash_lock to be held */ + +static inline xid_t __vx_dynamic_id(void) { static xid_t seq = MAX_S_CONTEXT; xid_t barrier = seq; @@ -150,29 +172,29 @@ static __inline__ xid_t __vx_dynamic_id(void) do { if (++seq > MAX_S_CONTEXT) seq = MIN_D_CONTEXT; - if (!__find_vx_info(seq)) + if (!__lookup_vx_info(seq)) return seq; } while (barrier != seq); return 0; } -static struct vx_info * __foc_vx_info(int id, int *err) +/* __loc_vx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct vx_info * __loc_vx_info(int id, int *err) { struct vx_info *new, *vxi = NULL; - vxdprintk("foc_vx_info(%d)\n", id); - if (!(new = alloc_vx_info(id))) { - *err = -ENOMEM; - return NULL; - } + vxdprintk("loc_vx_info(%d)\n", id); - /* dirty hack until Spectator becomes a cap */ - if (id == 0 || id == 1) { - *err = -EBUSY; + if (!(new = __alloc_vx_info(id))) { + *err = -ENOMEM; return NULL; } - spin_lock(&vxlist_lock); + spin_lock(&vx_info_hash_lock); /* dynamic context requested */ if (id == VX_DYNAMIC_ID) { @@ -184,14 +206,14 @@ static struct vx_info * __foc_vx_info(int id, int *err) new->vx_id = id; } /* existing context requested */ - else if ((vxi = __find_vx_info(id))) { + else if ((vxi = __lookup_vx_info(id))) { /* context in setup is not available */ if (vxi->vx_flags & VXF_STATE_SETUP) { - vxdprintk("foc_vx_info(%d) = %p (not available)\n", id, vxi); + vxdprintk("loc_vx_info(%d) = %p (not available)\n", id, vxi); vxi = NULL; *err = -EBUSY; } else { - vxdprintk("foc_vx_info(%d) = %p (found)\n", id, vxi); + vxdprintk("loc_vx_info(%d) = %p (found)\n", id, vxi); get_vx_info(vxi); *err = 0; } @@ -199,27 +221,131 @@ static struct vx_info * __foc_vx_info(int id, int *err) } /* new context requested */ - vxdprintk("foc_vx_info(%d) = %p (new)\n", id, new); - atomic_set(&new->vx_refcount, 1); - list_add(&new->vx_list, &vx_infos); + vxdprintk("loc_vx_info(%d) = %p (new)\n", id, new); + __hash_vx_info(get_vx_info(new)); vxi = new, new = NULL; *err = 1; out_unlock: - spin_unlock(&vxlist_lock); + spin_unlock(&vx_info_hash_lock); if (new) - free_vx_info(new); + __dealloc_vx_info(new); + return vxi; +} + + + +/* exported stuff */ + + + +void rcu_free_vx_info(void *obj) +{ + struct vx_info *vxi = obj; + int usecnt, refcnt; + + usecnt = atomic_read(&vxi->vx_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&vxi->vx_refcnt); + BUG_ON(refcnt < 0); + + if (!usecnt) + __dealloc_vx_info(vxi); + else + printk("!!! rcu didn't free\n"); +} + +void unhash_vx_info(struct vx_info *vxi) +{ + spin_lock(&vx_info_hash_lock); + __unhash_vx_info(vxi); + spin_unlock(&vx_info_hash_lock); +} + +/* locate_vx_info() + + * search for a vx_info and get() it + * negative id means current */ + +struct vx_info *locate_vx_info(int id) +{ + struct vx_info *vxi; + + if (id < 0) { + vxi = get_vx_info(current->vx_info); + } else { + rcu_read_lock(); + vxi = get_vx_info(__lookup_vx_info(id)); + rcu_read_unlock(); + } return vxi; } +/* vx_info_is_hashed() + + * verify that xid is still hashed */ + +int vx_info_is_hashed(xid_t xid) +{ + int hashed; + + rcu_read_lock(); + hashed = (__lookup_vx_info(xid) != NULL); + rcu_read_unlock(); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY -struct vx_info *find_or_create_vx_info(int id) +#if 0 +struct vx_info *alloc_vx_info(xid_t xid) +{ + return __alloc_vx_info(xid); +} +#endif + +struct vx_info *locate_or_create_vx_info(int id) { int err; - return __foc_vx_info(id, &err); + return __loc_vx_info(id, &err); } +#endif + +#ifdef CONFIG_PROC_FS + +#define hlist_for_each_rcu(pos, head) \ + for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \ + pos = pos->next, ({ smp_read_barrier_depends(); 0;})) + +int get_xid_list(int index, unsigned int *xids, int size) +{ + int hindex, nr_xids = 0; + + rcu_read_lock(); + for (hindex = 0; hindex < VX_HASH_SIZE; hindex++) { + struct hlist_head *head = &vx_info_hash[hindex]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct vx_info *vxi; + + if (--index > 0) + continue; + + vxi = hlist_entry(pos, struct vx_info, vx_hlist); + xids[nr_xids] = vxi->vx_id; + if (++nr_xids >= size) + goto out; + } + } +out: + rcu_read_unlock(); + return nr_xids; +} +#endif int vx_migrate_user(struct task_struct *p, struct vx_info *vxi) { @@ -300,17 +426,19 @@ static inline int vx_openfd_task(struct task_struct *tsk) int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) { - struct vx_info *old_vxi = task_get_vx_info(p); + struct vx_info *old_vxi; int ret = 0; if (!p || !vxi) BUG(); - vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, - vxi->vx_id, atomic_read(&vxi->vx_refcount)); + old_vxi = task_get_vx_info(p); if (old_vxi == vxi) goto out; + vxdprintk("vx_migrate_task(%p,%p[#%d.%d)\n", p, vxi, + vxi->vx_id, atomic_read(&vxi->vx_usecnt)); + if (!(ret = vx_migrate_user(p, vxi))) { task_lock(p); if (old_vxi) { @@ -321,6 +449,9 @@ int vx_migrate_task(struct task_struct *p, struct vx_info *vxi) atomic_inc(&vxi->limit.res[RLIMIT_NPROC]); atomic_add(vx_nofiles_task(p), &vxi->limit.res[RLIMIT_NOFILE]); atomic_add(vx_openfd_task(p), &vxi->limit.res[RLIMIT_OPENFD]); + /* should be handled in set_vx_info !! */ + if (old_vxi) + clr_vx_info(&p->vx_info); set_vx_info(&p->vx_info, vxi); p->xid = vxi->vx_id; vx_mask_bcaps(p); @@ -383,7 +514,7 @@ int vc_vx_info(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) return -EPERM; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -401,7 +532,6 @@ int vc_vx_info(uint32_t id, void __user *data) int vc_ctx_create(uint32_t xid, void __user *data) { - // int ret = -ENOMEM; struct vx_info *new_vxi; int ret; @@ -414,7 +544,7 @@ int vc_ctx_create(uint32_t xid, void __user *data) if (xid < 1) return -EINVAL; - new_vxi = __foc_vx_info(xid, &ret); + new_vxi = __loc_vx_info(xid, &ret); if (!new_vxi) return ret; if (!(new_vxi->vx_flags & VXF_STATE_SETUP)) { @@ -424,6 +554,7 @@ int vc_ctx_create(uint32_t xid, void __user *data) ret = new_vxi->vx_id; vx_migrate_task(current, new_vxi); + /* if this fails, we might end up with a hashed vx_info */ out_put: put_vx_info(new_vxi); return ret; @@ -443,7 +574,7 @@ int vc_ctx_migrate(uint32_t id, void __user *data) return 0; } - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; vx_migrate_task(current, vxi); @@ -460,13 +591,12 @@ int vc_get_cflags(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; vc_data.flagword = vxi->vx_flags; - // vc_data.mask = ~0UL; /* special STATE flag handling */ vc_data.mask = vx_mask_flags(~0UL, vxi->vx_flags, VXF_ONE_TIME); @@ -488,7 +618,7 @@ int vc_set_cflags(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -516,7 +646,7 @@ int vc_get_ccaps(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -540,7 +670,7 @@ int vc_set_ccaps(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -553,6 +683,6 @@ int vc_set_ccaps(uint32_t id, void __user *data) #include -EXPORT_SYMBOL_GPL(free_vx_info); -EXPORT_SYMBOL_GPL(vxlist_lock); +EXPORT_SYMBOL_GPL(rcu_free_vx_info); +EXPORT_SYMBOL_GPL(vx_info_hash_lock); diff --git a/kernel/vserver/init.c b/kernel/vserver/init.c index 8afd1fc64..4a20f268c 100644 --- a/kernel/vserver/init.c +++ b/kernel/vserver/init.c @@ -12,7 +12,6 @@ #include #include #include -// #include #include #include diff --git a/kernel/vserver/legacy.c b/kernel/vserver/legacy.c index a620ae3b5..8fe6fed6d 100644 --- a/kernel/vserver/legacy.c +++ b/kernel/vserver/legacy.c @@ -74,9 +74,9 @@ int vc_new_s_context(uint32_t ctx, void __user *data) return -EINVAL; if ((ctx == VX_DYNAMIC_ID) || (ctx < MIN_D_CONTEXT)) - new_vxi = find_or_create_vx_info(ctx); + new_vxi = locate_or_create_vx_info(ctx); else - new_vxi = find_vx_info(ctx); + new_vxi = locate_vx_info(ctx); if (!new_vxi) return -EINVAL; @@ -102,6 +102,7 @@ int vc_new_s_context(uint32_t ctx, void __user *data) } +extern struct nx_info *create_nx_info(void); /* set ipv4 root (syscall) */ @@ -152,9 +153,15 @@ int vc_set_ipv4root(uint32_t nbip, void __user *data) new_nxi->mask[i] = vc_data.nx_mask_pair[i].mask; } new_nxi->v4_bcast = vc_data.broadcast; - current->nx_info = new_nxi; - current->nid = new_nxi->nx_id; - put_nx_info(nxi); + // current->nx_info = new_nxi; + if (nxi) { + printk("!!! switching nx_info %p->%p\n", nxi, new_nxi); + clr_nx_info(¤t->nx_info); + } + nx_migrate_task(current, new_nxi); + // set_nx_info(¤t->nx_info, new_nxi); + // current->nid = new_nxi->nx_id; + put_nx_info(new_nxi); return 0; } diff --git a/kernel/vserver/limit.c b/kernel/vserver/limit.c index 5bd2fdcb9..668918c78 100644 --- a/kernel/vserver/limit.c +++ b/kernel/vserver/limit.c @@ -55,7 +55,7 @@ int vc_get_rlimit(uint32_t id, void __user *data) if (!is_valid_rlimit(vc_data.id)) return -ENOTSUPP; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -81,7 +81,7 @@ int vc_set_rlimit(uint32_t id, void __user *data) if (!is_valid_rlimit(vc_data.id)) return -ENOTSUPP; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; diff --git a/kernel/vserver/namespace.c b/kernel/vserver/namespace.c index 2c76c6fb4..f1c95c49e 100644 --- a/kernel/vserver/namespace.c +++ b/kernel/vserver/namespace.c @@ -57,7 +57,7 @@ int vc_set_vhi_name(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -77,7 +77,7 @@ int vc_get_vhi_name(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -126,7 +126,7 @@ int vc_enter_namespace(uint32_t id, void *data) if (!vx_check(0, VX_ADMIN)) return -ENOSYS; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH; @@ -158,11 +158,9 @@ out_put: int vc_cleanup_namespace(uint32_t id, void *data) { down_write(¤t->namespace->sem); - // spin_lock(&dcache_lock); spin_lock(&vfsmount_lock); umount_unused(current->namespace->root, current->fs); spin_unlock(&vfsmount_lock); - // spin_unlock(&dcache_lock); up_write(¤t->namespace->sem); return 0; } diff --git a/kernel/vserver/network.c b/kernel/vserver/network.c index 479a19b47..b37b0acb4 100644 --- a/kernel/vserver/network.c +++ b/kernel/vserver/network.c @@ -8,6 +8,7 @@ * V0.01 broken out from vcontext V0.05 * V0.02 cleaned up implementation * V0.03 added equiv nx commands + * V0.04 switch to RCU based hash * */ @@ -15,120 +16,130 @@ #include #include #include +#include #include -LIST_HEAD(nx_infos); +/* __alloc_nx_info() -spinlock_t nxlist_lock - __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; + * allocate an initialized nx_info struct + * doesn't make it visible (hash) */ - -/* - * struct nx_info allocation and deallocation - */ - -static struct nx_info *alloc_nx_info(void) +static struct nx_info *__alloc_nx_info(nid_t nid) { struct nx_info *new = NULL; nxdprintk("alloc_nx_info()\n"); + /* would this benefit from a slab cache? */ new = kmalloc(sizeof(struct nx_info), GFP_KERNEL); if (!new) return 0; memset (new, 0, sizeof(struct nx_info)); + new->nx_id = nid; + INIT_RCU_HEAD(&new->nx_rcu); + INIT_HLIST_NODE(&new->nx_hlist); + atomic_set(&new->nx_refcnt, 0); + atomic_set(&new->nx_usecnt, 0); + /* rest of init goes here */ nxdprintk("alloc_nx_info() = %p\n", new); return new; } -void free_nx_info(struct nx_info *nxi) +/* __dealloc_nx_info() + + * final disposal of nx_info */ + +static void __dealloc_nx_info(struct nx_info *nxi) { - nxdprintk("free_nx_info(%p)\n", nxi); + nxdprintk("dealloc_nx_info(%p)\n", nxi); + + nxi->nx_hlist.next = LIST_POISON1; + nxi->nx_id = -1; + + BUG_ON(atomic_read(&nxi->nx_usecnt)); + BUG_ON(atomic_read(&nxi->nx_refcnt)); + kfree(nxi); } -struct nx_info *create_nx_info(void) -{ - struct nx_info *new; - static int gnid = 1; - - nxdprintk("create_nx_info()\n"); - if (!(new = alloc_nx_info())) - return 0; - spin_lock(&nxlist_lock); +/* hash table for nx_info hash */ - /* new ip info */ - atomic_set(&new->nx_refcount, 1); - new->nx_id = gnid++; - list_add(&new->nx_list, &nx_infos); +#define NX_HASH_SIZE 13 - spin_unlock(&nxlist_lock); - return new; -} +struct hlist_head nx_info_hash[NX_HASH_SIZE]; +static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED; -/* - * struct nx_info search by id - * assumes nxlist_lock is held - */ -static __inline__ struct nx_info *__find_nx_info(int id) +static inline unsigned int __hashval(nid_t nid) { - struct nx_info *nxi; - - list_for_each_entry(nxi, &nx_infos, nx_list) - if (nxi->nx_id == id) - return nxi; - return 0; + return (nid % NX_HASH_SIZE); } -/* - * struct nx_info ref stuff - */ -struct nx_info *find_nx_info(int id) +/* __hash_nx_info() + + * add the nxi to the global hash table + * requires the hash_lock to be held */ + +static inline void __hash_nx_info(struct nx_info *nxi) { - struct nx_info *nxi; + struct hlist_head *head; - if (id < 0) { - nxi = current->nx_info; - get_nx_info(nxi); - } else { - spin_lock(&nxlist_lock); - if ((nxi = __find_nx_info(id))) - get_nx_info(nxi); - spin_unlock(&nxlist_lock); - } - return nxi; + nxdprintk("__hash_nx_info: %p[#%d]\n", nxi, nxi->nx_id); + get_nx_info(nxi); + head = &nx_info_hash[__hashval(nxi->nx_id)]; + hlist_add_head_rcu(&nxi->nx_hlist, head); } -/* - * verify that id is a valid nid - */ +/* __unhash_nx_info() + + * remove the nxi from the global hash table + * requires the hash_lock to be held */ -int nx_info_id_valid(int id) +static inline void __unhash_nx_info(struct nx_info *nxi) { - int valid; - - spin_lock(&nxlist_lock); - valid = (__find_nx_info(id) != NULL); - spin_unlock(&nxlist_lock); - return valid; + nxdprintk("__unhash_nx_info: %p[#%d]\n", nxi, nxi->nx_id); + hlist_del_rcu(&nxi->nx_hlist); + put_nx_info(nxi); } -/* - * dynamic context id ... - */ +/* __lookup_nx_info() -static __inline__ nid_t __nx_dynamic_id(void) + * requires the rcu_read_lock() + * doesn't increment the nx_refcnt */ + +static inline struct nx_info *__lookup_nx_info(nid_t nid) +{ + struct hlist_head *head = &nx_info_hash[__hashval(nid)]; + struct hlist_node *pos; + + hlist_for_each(pos, head) { + struct nx_info *nxi = + hlist_entry(pos, struct nx_info, nx_hlist); + + if (nxi->nx_id == nid) { + return nxi; + } + } + return NULL; +} + + +/* __nx_dynamic_id() + + * find unused dynamic nid + * requires the hash_lock to be held */ + +static inline nid_t __nx_dynamic_id(void) { static nid_t seq = MAX_N_CONTEXT; nid_t barrier = seq; @@ -136,27 +147,32 @@ static __inline__ nid_t __nx_dynamic_id(void) do { if (++seq > MAX_N_CONTEXT) seq = MIN_D_CONTEXT; - if (!__find_nx_info(seq)) + if (!__lookup_nx_info(seq)) return seq; } while (barrier != seq); return 0; } -static struct nx_info * __foc_nx_info(int id, int *err) +/* __loc_nx_info() + + * locate or create the requested context + * get() it and if new hash it */ + +static struct nx_info * __loc_nx_info(int id, int *err) { struct nx_info *new, *nxi = NULL; - nxdprintk("foc_nx_info(%d)\n", id); - // if (!(new = alloc_nx_info(id))) { - if (!(new = alloc_nx_info())) { + nxdprintk("loc_nx_info(%d)\n", id); + + if (!(new = __alloc_nx_info(id))) { *err = -ENOMEM; return NULL; } - spin_lock(&nxlist_lock); + spin_lock(&nx_info_hash_lock); /* dynamic context requested */ - if (id == IP_DYNAMIC_ID) { + if (id == NX_DYNAMIC_ID) { id = __nx_dynamic_id(); if (!id) { printk(KERN_ERR "no dynamic context available.\n"); @@ -165,14 +181,14 @@ static struct nx_info * __foc_nx_info(int id, int *err) new->nx_id = id; } /* existing context requested */ - else if ((nxi = __find_nx_info(id))) { + else if ((nxi = __lookup_nx_info(id))) { /* context in setup is not available */ if (nxi->nx_flags & VXF_STATE_SETUP) { - nxdprintk("foc_nx_info(%d) = %p (not available)\n", id, nxi); + nxdprintk("loc_nx_info(%d) = %p (not available)\n", id, nxi); nxi = NULL; *err = -EBUSY; } else { - nxdprintk("foc_nx_info(%d) = %p (found)\n", id, nxi); + nxdprintk("loc_nx_info(%d) = %p (found)\n", id, nxi); get_nx_info(nxi); *err = 0; } @@ -180,27 +196,139 @@ static struct nx_info * __foc_nx_info(int id, int *err) } /* new context requested */ - nxdprintk("foc_nx_info(%d) = %p (new)\n", id, new); - atomic_set(&new->nx_refcount, 1); - list_add(&new->nx_list, &nx_infos); + nxdprintk("loc_nx_info(%d) = %p (new)\n", id, new); + __hash_nx_info(get_nx_info(new)); nxi = new, new = NULL; *err = 1; out_unlock: - spin_unlock(&nxlist_lock); + spin_unlock(&nx_info_hash_lock); if (new) - free_nx_info(new); + __dealloc_nx_info(new); + return nxi; +} + + + +/* exported stuff */ + + + + +void rcu_free_nx_info(void *obj) +{ + struct nx_info *nxi = obj; + int usecnt, refcnt; + + usecnt = atomic_read(&nxi->nx_usecnt); + BUG_ON(usecnt < 0); + + refcnt = atomic_read(&nxi->nx_refcnt); + BUG_ON(refcnt < 0); + + if (!usecnt) + __dealloc_nx_info(nxi); + else + printk("!!! rcu didn't free\n"); +} + +void unhash_nx_info(struct nx_info *nxi) +{ + spin_lock(&nx_info_hash_lock); + __unhash_nx_info(nxi); + spin_unlock(&nx_info_hash_lock); +} + +/* locate_nx_info() + + * search for a nx_info and get() it + * negative id means current */ + +struct nx_info *locate_nx_info(int id) +{ + struct nx_info *nxi; + + if (id < 0) { + nxi = get_nx_info(current->nx_info); + } else { + rcu_read_lock(); + nxi = get_nx_info(__lookup_nx_info(id)); + rcu_read_unlock(); + } return nxi; } +/* nx_info_is_hashed() + + * verify that nid is still hashed */ + +int nx_info_is_hashed(nid_t nid) +{ + int hashed; + + rcu_read_lock(); + hashed = (__lookup_nx_info(nid) != NULL); + rcu_read_unlock(); + return hashed; +} + +#ifdef CONFIG_VSERVER_LEGACY -struct nx_info *find_or_create_nx_info(int id) +struct nx_info *locate_or_create_nx_info(int id) { int err; - return __foc_nx_info(id, &err); + return __loc_nx_info(id, &err); } +struct nx_info *create_nx_info(void) +{ + struct nx_info *new; + int err; + + nxdprintk("create_nx_info()\n"); + if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err))) + return NULL; + return new; +} + + +#endif + +#ifdef CONFIG_PROC_FS + +#define hlist_for_each_rcu(pos, head) \ + for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \ + pos = pos->next, ({ smp_read_barrier_depends(); 0;})) + +int get_nid_list(int index, unsigned int *nids, int size) +{ + int hindex, nr_nids = 0; + + rcu_read_lock(); + for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) { + struct hlist_head *head = &nx_info_hash[hindex]; + struct hlist_node *pos; + + hlist_for_each_rcu(pos, head) { + struct nx_info *nxi; + + if (--index > 0) + continue; + + nxi = hlist_entry(pos, struct nx_info, nx_hlist); + nids[nr_nids] = nxi->nx_id; + if (++nr_nids >= size) + goto out; + } + } +out: + rcu_read_unlock(); + return nr_nids; +} +#endif + + /* * migrate task to new network */ @@ -213,17 +341,22 @@ int nx_migrate_task(struct task_struct *p, struct nx_info *nxi) if (!p || !nxi) BUG(); - nxdprintk("nx_migrate_task(%p,%p[#%d.%d)\n", p, nxi, - nxi->nx_id, atomic_read(&nxi->nx_refcount)); + nxdprintk("nx_migrate_task(%p,%p[#%d.%d.%d])\n", + p, nxi, nxi->nx_id, + atomic_read(&nxi->nx_usecnt), + atomic_read(&nxi->nx_refcnt)); if (old_nxi == nxi) goto out; task_lock(p); + /* should be handled in set_nx_info !! */ + if (old_nxi) + clr_nx_info(&p->nx_info); set_nx_info(&p->nx_info, nxi); p->nid = nxi->nx_id; task_unlock(p); - put_nx_info(old_nxi); + // put_nx_info(old_nxi); out: put_nx_info(old_nxi); return ret; @@ -246,10 +379,9 @@ static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi) int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi) { - if (!nxi) - return 1; - - return __addr_in_nx_info(ifa->ifa_address, nxi); + if (nxi && ifa) + return __addr_in_nx_info(ifa->ifa_address, nxi); + return 1; } int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi) @@ -312,7 +444,7 @@ int vc_nx_info(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -342,7 +474,7 @@ int vc_net_create(uint32_t nid, void __user *data) if (nid < 1) return -EINVAL; - new_nxi = __foc_nx_info(nid, &ret); + new_nxi = __loc_nx_info(nid, &ret); if (!new_nxi) return ret; if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) { @@ -365,7 +497,7 @@ int vc_net_migrate(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; nx_migrate_task(current, nxi); @@ -383,7 +515,7 @@ int vc_net_add(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -402,7 +534,7 @@ int vc_net_remove(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -421,13 +553,12 @@ int vc_get_nflags(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; vc_data.flagword = nxi->nx_flags; - // vc_data.mask = ~0UL; /* special STATE flag handling */ vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME); @@ -449,7 +580,7 @@ int vc_set_nflags(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -472,7 +603,7 @@ int vc_get_ncaps(uint32_t id, void __user *data) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -495,7 +626,7 @@ int vc_set_ncaps(uint32_t id, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - nxi = find_nx_info(id); + nxi = locate_nx_info(id); if (!nxi) return -ESRCH; @@ -508,6 +639,6 @@ int vc_set_ncaps(uint32_t id, void __user *data) #include -EXPORT_SYMBOL_GPL(free_nx_info); -EXPORT_SYMBOL_GPL(nxlist_lock); +EXPORT_SYMBOL_GPL(rcu_free_nx_info); +EXPORT_SYMBOL_GPL(nx_info_hash_lock); diff --git a/kernel/vserver/proc.c b/kernel/vserver/proc.c index 42bc18200..3c98a5314 100644 --- a/kernel/vserver/proc.c +++ b/kernel/vserver/proc.c @@ -66,7 +66,7 @@ int proc_xid_info (int vid, char *buffer) struct vx_info *vxi; int length; - vxi = find_vx_info(vid); + vxi = locate_vx_info(vid); if (!vxi) return 0; length = sprintf(buffer, @@ -86,19 +86,21 @@ int proc_xid_status (int vid, char *buffer) struct vx_info *vxi; int length; - vxi = find_vx_info(vid); + vxi = locate_vx_info(vid); if (!vxi) return 0; length = sprintf(buffer, - "RefC:\t%d\n" + "UseCnt:\t%d\n" + "RefCnt:\t%d\n" "Flags:\t%016llx\n" "BCaps:\t%016llx\n" "CCaps:\t%016llx\n" "Ticks:\t%d\n" - ,atomic_read(&vxi->vx_refcount) - ,vxi->vx_flags - ,vxi->vx_bcaps - ,vxi->vx_ccaps + ,atomic_read(&vxi->vx_usecnt) + ,atomic_read(&vxi->vx_refcnt) + ,(unsigned long long)vxi->vx_flags + ,(unsigned long long)vxi->vx_bcaps + ,(unsigned long long)vxi->vx_ccaps ,atomic_read(&vxi->limit.ticks) ); put_vx_info(vxi); @@ -110,7 +112,7 @@ int proc_xid_limit (int vid, char *buffer) struct vx_info *vxi; int length; - vxi = find_vx_info(vid); + vxi = locate_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_limit(&vxi->limit, buffer); @@ -123,7 +125,7 @@ int proc_xid_sched (int vid, char *buffer) struct vx_info *vxi; int length; - vxi = find_vx_info(vid); + vxi = locate_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_sched(&vxi->sched, buffer); @@ -136,7 +138,7 @@ int proc_xid_cvirt (int vid, char *buffer) struct vx_info *vxi; int length; - vxi = find_vx_info(vid); + vxi = locate_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_cvirt(&vxi->cvirt, buffer); @@ -149,7 +151,7 @@ int proc_xid_cacct (int vid, char *buffer) struct vx_info *vxi; int length; - vxi = find_vx_info(vid); + vxi = locate_vx_info(vid); if (!vxi) return 0; length = vx_info_proc_cacct(&vxi->cacct, buffer); @@ -178,7 +180,7 @@ int proc_nid_info (int vid, char *buffer) struct nx_info *nxi; int length, i; - nxi = find_nx_info(vid); + nxi = locate_nx_info(vid); if (!nxi) return 0; length = sprintf(buffer, @@ -202,12 +204,14 @@ int proc_nid_status (int vid, char *buffer) struct nx_info *nxi; int length; - nxi = find_nx_info(vid); + nxi = locate_nx_info(vid); if (!nxi) return 0; length = sprintf(buffer, - "RefC:\t%d\n" - ,atomic_read(&nxi->nx_refcount) + "UseCnt:\t%d\n" + "RefCnt:\t%d\n" + ,atomic_read(&nxi->nx_usecnt) + ,atomic_read(&nxi->nx_refcnt) ); put_nx_info(nxi); return length; @@ -247,18 +251,18 @@ out: static int proc_vid_revalidate(struct dentry * dentry, struct nameidata *nd) { struct inode * inode = dentry->d_inode; - int vid, valid=0; + int vid, hashed=0; vid = inode_vid(inode); switch (inode_type(inode) & PROC_VID_MASK) { case PROC_XID_INO: - valid = vx_info_id_valid(vid); + hashed = vx_info_is_hashed(vid); break; case PROC_NID_INO: - valid = nx_info_id_valid(vid); + hashed = nx_info_is_hashed(vid); break; } - if (valid) + if (hashed) return 1; d_drop(dentry); return 0; @@ -564,7 +568,7 @@ struct dentry *proc_virtual_lookup(struct inode *dir, xid = atovid(name, len); if (xid < 0) goto out; - vxi = find_vx_info(xid); + vxi = locate_vx_info(xid); if (!vxi) goto out; @@ -634,7 +638,7 @@ struct dentry *proc_vnet_lookup(struct inode *dir, nid = atovid(name, len); if (nid < 0) goto out; - nxi = find_nx_info(nid); + nxi = locate_nx_info(nid); if (!nxi) goto out; @@ -667,27 +671,6 @@ out: #define PROC_NUMBUF 10 #define PROC_MAXVIDS 32 - -static int get_xid_list(int index, unsigned int *xids) -{ - struct vx_info *p; - int nr_xids = 0; - - index--; - spin_lock(&vxlist_lock); - list_for_each_entry(p, &vx_infos, vx_list) { - int xid = p->vx_id; - - if (--index >= 0) - continue; - xids[nr_xids] = xid; - if (++nr_xids >= PROC_MAXVIDS) - break; - } - spin_unlock(&vxlist_lock); - return nr_xids; -} - int proc_virtual_readdir(struct file * filp, void * dirent, filldir_t filldir) { @@ -729,12 +712,11 @@ int proc_virtual_readdir(struct file * filp, filp->f_pos++; } - nr_xids = get_xid_list(nr, xid_array); - + nr_xids = get_xid_list(nr, xid_array, PROC_MAXVIDS); for (i = 0; i < nr_xids; i++) { int xid = xid_array[i]; ino_t ino = fake_ino(xid, PROC_XID_INO); - unsigned long j = PROC_NUMBUF; + unsigned int j = PROC_NUMBUF; do buf[--j] = '0' + (xid % 10); while (xid/=10); @@ -757,27 +739,6 @@ static struct inode_operations proc_virtual_dir_inode_operations = { }; - -static int get_nid_list(int index, unsigned int *nids) -{ - struct nx_info *p; - int nr_nids = 0; - - index--; - spin_lock(&nxlist_lock); - list_for_each_entry(p, &nx_infos, nx_list) { - int nid = p->nx_id; - - if (--index >= 0) - continue; - nids[nr_nids] = nid; - if (++nr_nids >= PROC_MAXVIDS) - break; - } - spin_unlock(&nxlist_lock); - return nr_nids; -} - int proc_vnet_readdir(struct file * filp, void * dirent, filldir_t filldir) { @@ -819,8 +780,7 @@ int proc_vnet_readdir(struct file * filp, filp->f_pos++; } - nr_nids = get_nid_list(nr, nid_array); - + nr_nids = get_nid_list(nr, nid_array, PROC_MAXVIDS); for (i = 0; i < nr_nids; i++) { int nid = nid_array[i]; ino_t ino = fake_ino(nid, PROC_NID_INO); diff --git a/kernel/vserver/sched.c b/kernel/vserver/sched.c index a75195a19..9284bf47f 100644 --- a/kernel/vserver/sched.c +++ b/kernel/vserver/sched.c @@ -126,7 +126,7 @@ int vc_set_sched(uint32_t xid, void __user *data) if (copy_from_user (&vc_data, data, sizeof(vc_data))) return -EFAULT; - vxi = find_vx_info(xid); + vxi = locate_vx_info(xid); if (!vxi) return -EINVAL; diff --git a/kernel/vserver/signal.c b/kernel/vserver/signal.c index 464ea1be4..200eba83a 100644 --- a/kernel/vserver/signal.c +++ b/kernel/vserver/signal.c @@ -38,7 +38,7 @@ int vc_ctx_kill(uint32_t id, void __user *data) info.si_pid = current->pid; info.si_uid = current->uid; - vxi = find_vx_info(id); + vxi = locate_vx_info(id); if (!vxi) return -ESRCH;