* V0.01 broken out from vcontext V0.05
* V0.02 cleaned up implementation
* V0.03 added equiv nx commands
+ * V0.04 switch to RCU based hash
*
*/
#include <linux/slab.h>
#include <linux/vserver/network.h>
#include <linux/ninline.h>
+#include <linux/rcupdate.h>
#include <asm/errno.h>
-LIST_HEAD(nx_infos);
+/* __alloc_nx_info()
-spinlock_t nxlist_lock
- __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+ * allocate an initialized nx_info struct
+ * doesn't make it visible (hash) */
-
-/*
- * struct nx_info allocation and deallocation
- */
-
-static struct nx_info *alloc_nx_info(void)
+static struct nx_info *__alloc_nx_info(nid_t nid)
{
struct nx_info *new = NULL;
nxdprintk("alloc_nx_info()\n");
+
/* would this benefit from a slab cache? */
new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
if (!new)
return 0;
memset (new, 0, sizeof(struct nx_info));
+ new->nx_id = nid;
+ INIT_RCU_HEAD(&new->nx_rcu);
+ INIT_HLIST_NODE(&new->nx_hlist);
+ atomic_set(&new->nx_refcnt, 0);
+ atomic_set(&new->nx_usecnt, 0);
+
/* rest of init goes here */
nxdprintk("alloc_nx_info() = %p\n", new);
return new;
}
-void free_nx_info(struct nx_info *nxi)
+/* __dealloc_nx_info()
+
+ * final disposal of nx_info */
+
+static void __dealloc_nx_info(struct nx_info *nxi)
{
- nxdprintk("free_nx_info(%p)\n", nxi);
+ nxdprintk("dealloc_nx_info(%p)\n", nxi);
+
+ nxi->nx_hlist.next = LIST_POISON1;
+ nxi->nx_id = -1;
+
+ BUG_ON(atomic_read(&nxi->nx_usecnt));
+ BUG_ON(atomic_read(&nxi->nx_refcnt));
+
kfree(nxi);
}
-struct nx_info *create_nx_info(void)
-{
- struct nx_info *new;
- static int gnid = 1;
-
- nxdprintk("create_nx_info()\n");
- if (!(new = alloc_nx_info()))
- return 0;
- spin_lock(&nxlist_lock);
+/* hash table for nx_info hash */
- /* new ip info */
- atomic_set(&new->nx_refcount, 1);
- new->nx_id = gnid++;
- list_add(&new->nx_list, &nx_infos);
+#define NX_HASH_SIZE 13
- spin_unlock(&nxlist_lock);
- return new;
-}
+struct hlist_head nx_info_hash[NX_HASH_SIZE];
+static spinlock_t nx_info_hash_lock = SPIN_LOCK_UNLOCKED;
-/*
- * struct nx_info search by id
- * assumes nxlist_lock is held
- */
-static __inline__ struct nx_info *__find_nx_info(int id)
+static inline unsigned int __hashval(nid_t nid)
{
- struct nx_info *nxi;
-
- list_for_each_entry(nxi, &nx_infos, nx_list)
- if (nxi->nx_id == id)
- return nxi;
- return 0;
+ return (nid % NX_HASH_SIZE);
}
-/*
- * struct nx_info ref stuff
- */
-struct nx_info *find_nx_info(int id)
+/* __hash_nx_info()
+
+ * add the nxi to the global hash table
+ * requires the hash_lock to be held */
+
+static inline void __hash_nx_info(struct nx_info *nxi)
{
- struct nx_info *nxi;
+ struct hlist_head *head;
- if (id < 0) {
- nxi = current->nx_info;
- get_nx_info(nxi);
- } else {
- spin_lock(&nxlist_lock);
- if ((nxi = __find_nx_info(id)))
- get_nx_info(nxi);
- spin_unlock(&nxlist_lock);
- }
- return nxi;
+ nxdprintk("__hash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
+ get_nx_info(nxi);
+ head = &nx_info_hash[__hashval(nxi->nx_id)];
+ hlist_add_head_rcu(&nxi->nx_hlist, head);
}
-/*
- * verify that id is a valid nid
- */
+/* __unhash_nx_info()
+
+ * remove the nxi from the global hash table
+ * requires the hash_lock to be held */
-int nx_info_id_valid(int id)
+static inline void __unhash_nx_info(struct nx_info *nxi)
{
- int valid;
-
- spin_lock(&nxlist_lock);
- valid = (__find_nx_info(id) != NULL);
- spin_unlock(&nxlist_lock);
- return valid;
+ nxdprintk("__unhash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
+ hlist_del_rcu(&nxi->nx_hlist);
+ put_nx_info(nxi);
}
-/*
- * dynamic context id ...
- */
+/* __lookup_nx_info()
-static __inline__ nid_t __nx_dynamic_id(void)
+ * requires the rcu_read_lock()
+ * doesn't increment the nx_refcnt */
+
+static inline struct nx_info *__lookup_nx_info(nid_t nid)
+{
+ struct hlist_head *head = &nx_info_hash[__hashval(nid)];
+ struct hlist_node *pos;
+
+ hlist_for_each(pos, head) {
+ struct nx_info *nxi =
+ hlist_entry(pos, struct nx_info, nx_hlist);
+
+ if (nxi->nx_id == nid) {
+ return nxi;
+ }
+ }
+ return NULL;
+}
+
+
+/* __nx_dynamic_id()
+
+ * find unused dynamic nid
+ * requires the hash_lock to be held */
+
+static inline nid_t __nx_dynamic_id(void)
{
static nid_t seq = MAX_N_CONTEXT;
nid_t barrier = seq;
do {
if (++seq > MAX_N_CONTEXT)
seq = MIN_D_CONTEXT;
- if (!__find_nx_info(seq))
+ if (!__lookup_nx_info(seq))
return seq;
} while (barrier != seq);
return 0;
}
-static struct nx_info * __foc_nx_info(int id, int *err)
+/* __loc_nx_info()
+
+ * locate or create the requested context
+ * get() it and if new hash it */
+
+static struct nx_info * __loc_nx_info(int id, int *err)
{
struct nx_info *new, *nxi = NULL;
- nxdprintk("foc_nx_info(%d)\n", id);
- // if (!(new = alloc_nx_info(id))) {
- if (!(new = alloc_nx_info())) {
+ nxdprintk("loc_nx_info(%d)\n", id);
+
+ if (!(new = __alloc_nx_info(id))) {
*err = -ENOMEM;
return NULL;
}
- spin_lock(&nxlist_lock);
+ spin_lock(&nx_info_hash_lock);
/* dynamic context requested */
- if (id == IP_DYNAMIC_ID) {
+ if (id == NX_DYNAMIC_ID) {
id = __nx_dynamic_id();
if (!id) {
printk(KERN_ERR "no dynamic context available.\n");
new->nx_id = id;
}
/* existing context requested */
- else if ((nxi = __find_nx_info(id))) {
+ else if ((nxi = __lookup_nx_info(id))) {
/* context in setup is not available */
if (nxi->nx_flags & VXF_STATE_SETUP) {
- nxdprintk("foc_nx_info(%d) = %p (not available)\n", id, nxi);
+ nxdprintk("loc_nx_info(%d) = %p (not available)\n", id, nxi);
nxi = NULL;
*err = -EBUSY;
} else {
- nxdprintk("foc_nx_info(%d) = %p (found)\n", id, nxi);
+ nxdprintk("loc_nx_info(%d) = %p (found)\n", id, nxi);
get_nx_info(nxi);
*err = 0;
}
}
/* new context requested */
- nxdprintk("foc_nx_info(%d) = %p (new)\n", id, new);
- atomic_set(&new->nx_refcount, 1);
- list_add(&new->nx_list, &nx_infos);
+ nxdprintk("loc_nx_info(%d) = %p (new)\n", id, new);
+ __hash_nx_info(get_nx_info(new));
nxi = new, new = NULL;
*err = 1;
out_unlock:
- spin_unlock(&nxlist_lock);
+ spin_unlock(&nx_info_hash_lock);
if (new)
- free_nx_info(new);
+ __dealloc_nx_info(new);
+ return nxi;
+}
+
+
+
+/* exported stuff */
+
+
+
+
+void rcu_free_nx_info(void *obj)
+{
+ struct nx_info *nxi = obj;
+ int usecnt, refcnt;
+
+ usecnt = atomic_read(&nxi->nx_usecnt);
+ BUG_ON(usecnt < 0);
+
+ refcnt = atomic_read(&nxi->nx_refcnt);
+ BUG_ON(refcnt < 0);
+
+ if (!usecnt)
+ __dealloc_nx_info(nxi);
+ else
+ printk("!!! rcu didn't free\n");
+}
+
+void unhash_nx_info(struct nx_info *nxi)
+{
+ spin_lock(&nx_info_hash_lock);
+ __unhash_nx_info(nxi);
+ spin_unlock(&nx_info_hash_lock);
+}
+
+/* locate_nx_info()
+
+ * search for a nx_info and get() it
+ * negative id means current */
+
+struct nx_info *locate_nx_info(int id)
+{
+ struct nx_info *nxi;
+
+ if (id < 0) {
+ nxi = get_nx_info(current->nx_info);
+ } else {
+ rcu_read_lock();
+ nxi = get_nx_info(__lookup_nx_info(id));
+ rcu_read_unlock();
+ }
return nxi;
}
+/* nx_info_is_hashed()
+
+ * verify that nid is still hashed */
+
+int nx_info_is_hashed(nid_t nid)
+{
+ int hashed;
+
+ rcu_read_lock();
+ hashed = (__lookup_nx_info(nid) != NULL);
+ rcu_read_unlock();
+ return hashed;
+}
+
+#ifdef CONFIG_VSERVER_LEGACY
-struct nx_info *find_or_create_nx_info(int id)
+struct nx_info *locate_or_create_nx_info(int id)
{
int err;
- return __foc_nx_info(id, &err);
+ return __loc_nx_info(id, &err);
}
+struct nx_info *create_nx_info(void)
+{
+ struct nx_info *new;
+ int err;
+
+ nxdprintk("create_nx_info()\n");
+ if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
+ return NULL;
+ return new;
+}
+
+
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+#define hlist_for_each_rcu(pos, head) \
+ for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
+ pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
+
+int get_nid_list(int index, unsigned int *nids, int size)
+{
+ int hindex, nr_nids = 0;
+
+ rcu_read_lock();
+ for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
+ struct hlist_head *head = &nx_info_hash[hindex];
+ struct hlist_node *pos;
+
+ hlist_for_each_rcu(pos, head) {
+ struct nx_info *nxi;
+
+ if (--index > 0)
+ continue;
+
+ nxi = hlist_entry(pos, struct nx_info, nx_hlist);
+ nids[nr_nids] = nxi->nx_id;
+ if (++nr_nids >= size)
+ goto out;
+ }
+ }
+out:
+ rcu_read_unlock();
+ return nr_nids;
+}
+#endif
+
+
/*
* migrate task to new network
*/
if (!p || !nxi)
BUG();
- nxdprintk("nx_migrate_task(%p,%p[#%d.%d)\n", p, nxi,
- nxi->nx_id, atomic_read(&nxi->nx_refcount));
+ nxdprintk("nx_migrate_task(%p,%p[#%d.%d.%d])\n",
+ p, nxi, nxi->nx_id,
+ atomic_read(&nxi->nx_usecnt),
+ atomic_read(&nxi->nx_refcnt));
if (old_nxi == nxi)
goto out;
task_lock(p);
+ /* should be handled in set_nx_info !! */
+ if (old_nxi)
+ clr_nx_info(&p->nx_info);
set_nx_info(&p->nx_info, nxi);
p->nid = nxi->nx_id;
task_unlock(p);
- put_nx_info(old_nxi);
+ // put_nx_info(old_nxi);
out:
put_nx_info(old_nxi);
return ret;
int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
{
- if (!nxi)
- return 1;
-
- return __addr_in_nx_info(ifa->ifa_address, nxi);
+ if (nxi && ifa)
+ return __addr_in_nx_info(ifa->ifa_address, nxi);
+ return 1;
}
int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RESOURCE))
return -EPERM;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
if (nid < 1)
return -EINVAL;
- new_nxi = __foc_nx_info(nid, &ret);
+ new_nxi = __loc_nx_info(nid, &ret);
if (!new_nxi)
return ret;
if (!(new_nxi->nx_flags & VXF_STATE_SETUP)) {
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
nx_migrate_task(current, nxi);
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
vc_data.flagword = nxi->nx_flags;
- // vc_data.mask = ~0UL;
/* special STATE flag handling */
vc_data.mask = vx_mask_flags(~0UL, nxi->nx_flags, IPF_ONE_TIME);
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
if (copy_from_user (&vc_data, data, sizeof(vc_data)))
return -EFAULT;
- nxi = find_nx_info(id);
+ nxi = locate_nx_info(id);
if (!nxi)
return -ESRCH;
#include <linux/module.h>
-EXPORT_SYMBOL_GPL(free_nx_info);
-EXPORT_SYMBOL_GPL(nxlist_lock);
+EXPORT_SYMBOL_GPL(rcu_free_nx_info);
+EXPORT_SYMBOL_GPL(nx_info_hash_lock);