*
* Virtual Server: Network Support
*
- * Copyright (C) 2003-2004 Herbert Pötzl
+ * Copyright (C) 2003-2005 Herbert Pötzl
*
* V0.01 broken out from vcontext V0.05
* V0.02 cleaned up implementation
#include <linux/config.h>
#include <linux/slab.h>
-#include <linux/vserver.h>
-#include <linux/vs_base.h>
-#include <linux/vs_network.h>
+#include <linux/vserver/network_cmd.h>
#include <linux/rcupdate.h>
+#include <net/tcp.h>
#include <asm/errno.h>
static struct nx_info *__alloc_nx_info(nid_t nid)
{
struct nx_info *new = NULL;
-
- nxdprintk("alloc_nx_info()\n");
+
+ vxdprintk(VXD_CBIT(nid, 1), "alloc_nx_info(%d)*", nid);
/* would this benefit from a slab cache? */
new = kmalloc(sizeof(struct nx_info), GFP_KERNEL);
if (!new)
return 0;
-
+
memset (new, 0, sizeof(struct nx_info));
new->nx_id = nid;
INIT_RCU_HEAD(&new->nx_rcu);
atomic_set(&new->nx_usecnt, 0);
/* rest of init goes here */
-
- nxdprintk("alloc_nx_info() = %p\n", new);
+
+ vxdprintk(VXD_CBIT(nid, 0),
+ "alloc_nx_info() = %p", new);
return new;
}
static void __dealloc_nx_info(struct nx_info *nxi)
{
- nxdprintk("dealloc_nx_info(%p)\n", nxi);
+ vxdprintk(VXD_CBIT(nid, 0),
+ "dealloc_nx_info(%p)", nxi);
nxi->nx_hlist.next = LIST_POISON1;
nxi->nx_id = -1;
-
+
BUG_ON(atomic_read(&nxi->nx_usecnt));
BUG_ON(atomic_read(&nxi->nx_refcnt));
kfree(nxi);
}
+static inline int __free_nx_info(struct nx_info *nxi)
+{
+ int usecnt, refcnt;
+
+ BUG_ON(!nxi);
+
+ usecnt = atomic_read(&nxi->nx_usecnt);
+ BUG_ON(usecnt < 0);
+
+ refcnt = atomic_read(&nxi->nx_refcnt);
+ BUG_ON(refcnt < 0);
+
+ if (!usecnt)
+ __dealloc_nx_info(nxi);
+ return usecnt;
+}
+
+/* exported stuff */
+
+void free_nx_info(struct nx_info *nxi)
+{
+ /* context shutdown is mandatory */
+ // BUG_ON(nxi->nx_state != NXS_SHUTDOWN);
+
+ // BUG_ON(nxi->nx_state & NXS_HASHED);
+
+ BUG_ON(__free_nx_info(nxi));
+}
+
/* hash table for nx_info hash */
-#define NX_HASH_SIZE 13
+#define NX_HASH_SIZE 13
struct hlist_head nx_info_hash[NX_HASH_SIZE];
static inline void __hash_nx_info(struct nx_info *nxi)
{
struct hlist_head *head;
-
- nxdprintk("__hash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
+
+ vxdprintk(VXD_CBIT(nid, 4),
+ "__hash_nx_info: %p[#%d]", nxi, nxi->nx_id);
get_nx_info(nxi);
head = &nx_info_hash[__hashval(nxi->nx_id)];
- hlist_add_head_rcu(&nxi->nx_hlist, head);
+ hlist_add_head(&nxi->nx_hlist, head);
}
/* __unhash_nx_info()
static inline void __unhash_nx_info(struct nx_info *nxi)
{
- nxdprintk("__unhash_nx_info: %p[#%d]\n", nxi, nxi->nx_id);
- hlist_del_rcu(&nxi->nx_hlist);
+ vxd_assert_lock(&nx_info_hash_lock);
+ vxdprintk(VXD_CBIT(nid, 4),
+ "__unhash_nx_info: %p[#%d]", nxi, nxi->nx_id);
+ hlist_del(&nxi->nx_hlist);
put_nx_info(nxi);
}
/* __lookup_nx_info()
- * requires the rcu_read_lock()
+ * requires the hash_lock to be held
* doesn't increment the nx_refcnt */
static inline struct nx_info *__lookup_nx_info(nid_t nid)
struct hlist_head *head = &nx_info_hash[__hashval(nid)];
struct hlist_node *pos;
- hlist_for_each_rcu(pos, head) {
+ vxd_assert_lock(&nx_info_hash_lock);
+ hlist_for_each(pos, head) {
struct nx_info *nxi =
hlist_entry(pos, struct nx_info, nx_hlist);
{
static nid_t seq = MAX_N_CONTEXT;
nid_t barrier = seq;
-
+
+ vxd_assert_lock(&nx_info_hash_lock);
do {
if (++seq > MAX_N_CONTEXT)
seq = MIN_D_CONTEXT;
- if (!__lookup_nx_info(seq))
+ if (!__lookup_nx_info(seq)) {
+ vxdprintk(VXD_CBIT(nid, 4),
+ "__nx_dynamic_id: [#%d]", seq);
return seq;
+ }
} while (barrier != seq);
return 0;
}
static struct nx_info * __loc_nx_info(int id, int *err)
{
struct nx_info *new, *nxi = NULL;
-
- nxdprintk("loc_nx_info(%d)\n", id);
+
+ vxdprintk(VXD_CBIT(nid, 1), "loc_nx_info(%d)*", id);
if (!(new = __alloc_nx_info(id))) {
*err = -ENOMEM;
return NULL;
}
+ /* required to make dynamic xids unique */
spin_lock(&nx_info_hash_lock);
/* dynamic context requested */
else if ((nxi = __lookup_nx_info(id))) {
/* context in setup is not available */
if (nxi->nx_flags & VXF_STATE_SETUP) {
- nxdprintk("loc_nx_info(%d) = %p (not available)\n", id, nxi);
+ vxdprintk(VXD_CBIT(nid, 0),
+ "loc_nx_info(%d) = %p (not available)", id, nxi);
nxi = NULL;
*err = -EBUSY;
} else {
- nxdprintk("loc_nx_info(%d) = %p (found)\n", id, nxi);
+ vxdprintk(VXD_CBIT(nid, 0),
+ "loc_nx_info(%d) = %p (found)", id, nxi);
get_nx_info(nxi);
*err = 0;
}
}
/* new context requested */
- nxdprintk("loc_nx_info(%d) = %p (new)\n", id, new);
+ vxdprintk(VXD_CBIT(nid, 0),
+ "loc_nx_info(%d) = %p (new)", id, new);
__hash_nx_info(get_nx_info(new));
nxi = new, new = NULL;
*err = 1;
/* exported stuff */
-
-
-void rcu_free_nx_info(void *obj)
-{
- struct nx_info *nxi = obj;
- int usecnt, refcnt;
-
- usecnt = atomic_read(&nxi->nx_usecnt);
- BUG_ON(usecnt < 0);
-
- refcnt = atomic_read(&nxi->nx_refcnt);
- BUG_ON(refcnt < 0);
-
- if (!usecnt)
- __dealloc_nx_info(nxi);
- else
- printk("!!! rcu didn't free\n");
-}
-
void unhash_nx_info(struct nx_info *nxi)
{
spin_lock(&nx_info_hash_lock);
/* locate_nx_info()
- * search for a nx_info and get() it
+ * search for a nx_info and get() it
* negative id means current */
struct nx_info *locate_nx_info(int id)
{
struct nx_info *nxi;
-
+
if (id < 0) {
nxi = get_nx_info(current->nx_info);
} else {
- rcu_read_lock();
+ spin_lock(&nx_info_hash_lock);
nxi = get_nx_info(__lookup_nx_info(id));
- rcu_read_unlock();
+ spin_unlock(&nx_info_hash_lock);
}
return nxi;
}
-/* nx_info_is_hashed()
+/* nid_is_hashed()
* verify that nid is still hashed */
-int nx_info_is_hashed(nid_t nid)
+int nid_is_hashed(nid_t nid)
{
int hashed;
- rcu_read_lock();
+ spin_lock(&nx_info_hash_lock);
hashed = (__lookup_nx_info(nid) != NULL);
- rcu_read_unlock();
+ spin_unlock(&nx_info_hash_lock);
return hashed;
}
-#ifdef CONFIG_VSERVER_LEGACY
+#ifdef CONFIG_VSERVER_LEGACYNET
struct nx_info *locate_or_create_nx_info(int id)
{
{
struct nx_info *new;
int err;
-
- nxdprintk("create_nx_info()\n");
+
+ vxdprintk(VXD_CBIT(nid, 5), "create_nx_info(%s)", "void");
if (!(new = __loc_nx_info(NX_DYNAMIC_ID, &err)))
return NULL;
return new;
#ifdef CONFIG_PROC_FS
-#define hlist_for_each_rcu(pos, head) \
- for (pos = (head)->first; pos && ({ prefetch(pos->next); 1;}); \
- pos = pos->next, ({ smp_read_barrier_depends(); 0;}))
-
int get_nid_list(int index, unsigned int *nids, int size)
{
int hindex, nr_nids = 0;
- rcu_read_lock();
for (hindex = 0; hindex < NX_HASH_SIZE; hindex++) {
struct hlist_head *head = &nx_info_hash[hindex];
struct hlist_node *pos;
- hlist_for_each_rcu(pos, head) {
+ spin_lock(&nx_info_hash_lock);
+ hlist_for_each(pos, head) {
struct nx_info *nxi;
if (--index > 0)
continue;
nxi = hlist_entry(pos, struct nx_info, nx_hlist);
- nids[nr_nids] = nxi->nx_id;
- if (++nr_nids >= size)
+ nids[nr_nids] = nxi->nx_id;
+ if (++nr_nids >= size) {
+ spin_unlock(&nx_info_hash_lock);
goto out;
+ }
}
+ /* keep the lock time short */
+ spin_unlock(&nx_info_hash_lock);
}
out:
- rcu_read_unlock();
return nr_nids;
}
#endif
{
struct nx_info *old_nxi;
int ret = 0;
-
+
if (!p || !nxi)
BUG();
- nxdprintk("nx_migrate_task(%p,%p[#%d.%d.%d])\n",
+ vxdprintk(VXD_CBIT(nid, 5),
+ "nx_migrate_task(%p,%p[#%d.%d.%d])",
p, nxi, nxi->nx_id,
atomic_read(&nxi->nx_usecnt),
atomic_read(&nxi->nx_refcnt));
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
-static inline int __addr_in_nx_info(u32 addr, struct nx_info *nxi)
-{
- int i, nbip;
-
- nbip = nxi->nbipv4;
- for (i=0; i<nbip; i++)
- if (nxi->ipv4[i] == addr)
- return 1;
- return 0;
-}
int ifa_in_nx_info(struct in_ifaddr *ifa, struct nx_info *nxi)
{
- if (nxi && ifa)
- return __addr_in_nx_info(ifa->ifa_address, nxi);
- return 1;
+ if (!nxi)
+ return 1;
+ if (!ifa)
+ return 0;
+ return addr_in_nx_info(nxi, ifa->ifa_address);
}
int dev_in_nx_info(struct net_device *dev, struct nx_info *nxi)
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
- if (__addr_in_nx_info(ifa->ifa_address, nxi))
+ if (addr_in_nx_info(nxi, ifa->ifa_address))
return 1;
}
return 0;
}
+/*
+ * check if address is covered by socket
+ *
+ * sk: the socket to check against
+ * addr: the address in question (must be != 0)
+ */
+static inline int __addr_in_socket(struct sock *sk, uint32_t addr)
+{
+ struct nx_info *nxi = sk->sk_nx_info;
+ uint32_t saddr = tcp_v4_rcv_saddr(sk);
+
+ vxdprintk(VXD_CBIT(net, 5),
+ "__addr_in_socket(%p,%d.%d.%d.%d) %p:%d.%d.%d.%d %p;%lx",
+ sk, VXD_QUAD(addr), nxi, VXD_QUAD(saddr), sk->sk_socket,
+ (sk->sk_socket?sk->sk_socket->flags:0));
+
+ if (saddr) {
+ /* direct address match */
+ return (saddr == addr);
+ } else if (nxi) {
+ /* match against nx_info */
+ return addr_in_nx_info(nxi, addr);
+ } else {
+ /* unrestricted any socket */
+ return 1;
+ }
+}
+
+int nx_addr_conflict(struct nx_info *nxi, uint32_t addr, struct sock *sk)
+{
+ vxdprintk(VXD_CBIT(net, 2),
+ "nx_addr_conflict(%p,%p) %d.%d,%d.%d",
+ nxi, sk, VXD_QUAD(addr));
+
+ if (addr) {
+ /* check real address */
+ return __addr_in_socket(sk, addr);
+ } else if (nxi) {
+ /* check against nx_info */
+ int i, n = nxi->nbipv4;
+
+ for (i=0; i<n; i++)
+ if (__addr_in_socket(sk, nxi->ipv4[i]))
+ return 1;
+ return 0;
+ } else {
+ /* check against any */
+ return 1;
+ }
+}
/* vserver syscall commands below here */
int vc_task_nid(uint32_t id, void __user *data)
{
- nid_t nid;
-
- if (id) {
- struct task_struct *tsk;
-
- if (!vx_check(0, VX_ADMIN|VX_WATCH))
- return -EPERM;
-
- read_lock(&tasklist_lock);
- tsk = find_task_by_pid(id);
- nid = (tsk) ? tsk->nid : -ESRCH;
- read_unlock(&tasklist_lock);
- }
- else
- nid = current->nid;
- return nid;
+ nid_t nid;
+
+ if (id) {
+ struct task_struct *tsk;
+
+ if (!vx_check(0, VX_ADMIN|VX_WATCH))
+ return -EPERM;
+
+ read_lock(&tasklist_lock);
+ tsk = find_task_by_real_pid(id);
+ nid = (tsk) ? tsk->nid : -ESRCH;
+ read_unlock(&tasklist_lock);
+ }
+ else
+ nid = current->nid;
+ return nid;
}
int vc_net_create(uint32_t nid, void __user *data)
{
- // int ret = -ENOMEM;
+ // int ret = -ENOMEM;
struct nx_info *new_nxi;
int ret;
int vc_net_migrate(uint32_t id, void __user *data)
{
struct nx_info *nxi;
-
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
#include <linux/module.h>
-EXPORT_SYMBOL_GPL(rcu_free_nx_info);
-EXPORT_SYMBOL_GPL(nx_info_hash_lock);
+EXPORT_SYMBOL_GPL(free_nx_info);
EXPORT_SYMBOL_GPL(unhash_nx_info);